diff options
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-ext.ll (renamed from llvm/test/CodeGen/X86/avx512-trunc-ext.ll) | 26 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 390 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-trunc.ll | 364 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 78 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 156 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 780 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/masked_memop.ll | 11 | ||||
| -rw-r--r-- | llvm/test/MC/X86/x86-64-avx512bw.s | 120 | ||||
| -rw-r--r-- | llvm/test/MC/X86/x86-64-avx512bw_vl.s | 480 | ||||
| -rw-r--r-- | llvm/test/MC/X86/x86-64-avx512f_vl.s | 1200 |
10 files changed, 3576 insertions, 29 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-trunc-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll index f25458972e4..aa1dd4928c3 100644 --- a/llvm/test/CodeGen/X86/avx512-trunc-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -1,24 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX - -; KNL-LABEL: trunc_16x32_to_16x8 -; KNL: vpmovdb -; KNL: ret -define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone { - %x = trunc <16 x i32> %i to <16 x i8> - ret <16 x i8> %x -} - -; KNL-LABEL: trunc_8x64_to_8x16 -; KNL: vpmovqw -; KNL: ret -define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone { - %x = trunc <8 x i64> %i to <8 x i16> - ret <8 x i16> %x -} - -;SKX-LABEL: zext_8x8mem_to_8x16: + ;SKX-LABEL: zext_8x8mem_to_8x16: ;SKX: ## BB#0: ;SKX-NEXT: vpmovw2m %xmm0, %k1 ;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z} @@ -895,13 +878,6 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { ret <8 x i32> %y } -; KNL-LABEL: trunc_v16i32_to_v16i16 -; KNL: vpmovdw -; KNL: ret -define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) { - %1 = trunc <16 x i32> %x to <16 x i16> - ret <16 x i16> %1 -} ; KNL-LABEL: trunc_i32_to_i1 ; KNL: movw $-4, %ax diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 6e50fda7467..7c30063ce28 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -3119,6 +3119,396 @@ define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 ret <16 x float> %res2 } +declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512: +; CHECK: vpmovqb %zmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqb %zmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512: +; CHECK: vpmovqb %zmm0, (%rdi) +; CHECK: vpmovqb %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512: +; CHECK: vpmovsqb %zmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqb %zmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512: +; CHECK: vpmovsqb %zmm0, (%rdi) +; CHECK: vpmovsqb %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512: +; CHECK: vpmovusqb %zmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqb %zmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512: +; CHECK: vpmovusqb %zmm0, (%rdi) +; CHECK: vpmovusqb %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512: +; CHECK: vpmovqw %zmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqw %zmm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512: +; CHECK: vpmovqw %zmm0, (%rdi) +; CHECK: vpmovqw %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512: +; CHECK: vpmovsqw %zmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqw %zmm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512: +; CHECK: vpmovsqw %zmm0, (%rdi) +; CHECK: vpmovsqw %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512: +; CHECK: vpmovusqw %zmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqw %zmm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512: +; CHECK: vpmovusqw %zmm0, (%rdi) +; CHECK: vpmovusqw %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512: +; CHECK: vpmovqd %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovqd %zmm0, %ymm0 + %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) + %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) + %res3 = add <8 x i32> %res0, %res1 + %res4 = add <8 x i32> %res3, %res2 + ret <8 x i32> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512: +; CHECK: vpmovqd %zmm0, (%rdi) +; CHECK: vpmovqd %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512: +; CHECK: vpmovsqd %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovsqd %zmm0, %ymm0 + %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) + %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) + %res3 = add <8 x i32> %res0, %res1 + %res4 = add <8 x i32> %res3, %res2 + ret <8 x i32> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512: +; CHECK: vpmovsqd %zmm0, (%rdi) +; CHECK: vpmovsqd %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512: +; CHECK: vpmovusqd %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovusqd %zmm0, %ymm0 + %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) + %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) + %res3 = add <8 x i32> %res0, %res1 + %res4 = add <8 x i32> %res3, %res2 + ret <8 x i32> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512: +; CHECK: vpmovusqd %zmm0, (%rdi) +; CHECK: vpmovusqd %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512: +; CHECK: vpmovdb %zmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovdb %zmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512: +; CHECK: vpmovdb %zmm0, (%rdi) +; CHECK: vpmovdb %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512: +; CHECK: vpmovsdb %zmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsdb %zmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512: +; CHECK: vpmovsdb %zmm0, (%rdi) +; CHECK: vpmovsdb %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512: +; CHECK: vpmovusdb %zmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusdb %zmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512: +; CHECK: vpmovusdb %zmm0, (%rdi) +; CHECK: vpmovusdb %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512: +; CHECK: vpmovdw %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovdw %zmm0, %ymm0 + %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) + %res3 = add <16 x i16> %res0, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512: +; CHECK: vpmovdw %zmm0, (%rdi) +; CHECK: vpmovdw %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512: +; CHECK: vpmovsdw %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovsdw %zmm0, %ymm0 + %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) + %res3 = add <16 x i16> %res0, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512: +; CHECK: vpmovsdw %zmm0, (%rdi) +; CHECK: vpmovsdw %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512: +; CHECK: vpmovusdw %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovusdw %zmm0, %ymm0 + %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) + %res3 = add <16 x i16> %res0, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512: +; CHECK: vpmovusdw %zmm0, (%rdi) +; CHECK: vpmovusdw %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8) define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll new file mode 100644 index 00000000000..9205feda7eb --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-trunc.ll @@ -0,0 +1,364 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX + + attributes #0 = { nounwind } + +; KNL-LABEL: trunc_16x32_to_16x8 +; KNL: vpmovdb +; KNL: ret +define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 { + %x = trunc <16 x i32> %i to <16 x i8> + ret <16 x i8> %x +} + +; KNL-LABEL: trunc_8x64_to_8x16 +; KNL: vpmovqw +; KNL: ret +define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 { + %x = trunc <8 x i64> %i to <8 x i16> + ret <8 x i16> %x +} + +; KNL-LABEL: trunc_v16i32_to_v16i16 +; KNL: vpmovdw +; KNL: ret +define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 { + %1 = trunc <16 x i32> %x to <16 x i16> + ret <16 x i16> %1 +} + +define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 { +; SKX-LABEL: trunc_qb_512: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqw %zmm0, %xmm0 +; SKX-NEXT: retq + %x = trunc <8 x i64> %i to <8 x i8> + ret <8 x i8> %x +} + +define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 { +; SKX-LABEL: trunc_qb_512_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqb %zmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <8 x i64> %i to <8 x i8> + store <8 x i8> %x, <8 x i8>* %res + ret void +} + +define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 { +; SKX-LABEL: trunc_qb_256: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqd %ymm0, %xmm0 +; SKX-NEXT: retq + %x = trunc <4 x i64> %i to <4 x i8> + ret <4 x i8> %x +} + +define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 { +; SKX-LABEL: trunc_qb_256_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqb %ymm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <4 x i64> %i to <4 x i8> + store <4 x i8> %x, <4 x i8>* %res + ret void +} + +define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 { +; SKX-LABEL: trunc_qb_128: +; SKX: ## BB#0: +; SKX-NEXT: retq + %x = trunc <2 x i64> %i to <2 x i8> + ret <2 x i8> %x +} + +define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 { +; SKX-LABEL: trunc_qb_128_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqb %xmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <2 x i64> %i to <2 x i8> + store <2 x i8> %x, <2 x i8>* %res + ret void +} + +define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 { +; SKX-LABEL: trunc_qw_512: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqw %zmm0, %xmm0 +; SKX-NEXT: retq + %x = trunc <8 x i64> %i to <8 x i16> + ret <8 x i16> %x +} + +define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 { +; SKX-LABEL: trunc_qw_512_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqw %zmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <8 x i64> %i to <8 x i16> + store <8 x i16> %x, <8 x i16>* %res + ret void +} + +define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 { +; SKX-LABEL: trunc_qw_256: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqd %ymm0, %xmm0 +; SKX-NEXT: retq + %x = trunc <4 x i64> %i to <4 x i16> + ret <4 x i16> %x +} + +define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 { +; SKX-LABEL: trunc_qw_256_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqw %ymm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <4 x i64> %i to <4 x i16> + store <4 x i16> %x, <4 x i16>* %res + ret void +} + +define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 { +; SKX-LABEL: trunc_qw_128: +; SKX: ## BB#0: +; SKX-NEXT: retq + %x = trunc <2 x i64> %i to <2 x i16> + ret <2 x i16> %x +} + +define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 { +; SKX-LABEL: trunc_qw_128_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqw %xmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <2 x i64> %i to <2 x i16> + store <2 x i16> %x, <2 x i16>* %res + ret void +} + +define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 { +; SKX-LABEL: trunc_qd_512: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqd %zmm0, %ymm0 +; SKX-NEXT: retq + %x = trunc <8 x i64> %i to <8 x i32> + ret <8 x i32> %x +} + +define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 { +; SKX-LABEL: trunc_qd_512_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqd %zmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <8 x i64> %i to <8 x i32> + store <8 x i32> %x, <8 x i32>* %res + ret void +} + +define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 { +; SKX-LABEL: trunc_qd_256: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqd %ymm0, %xmm0 +; SKX-NEXT: retq + %x = trunc <4 x i64> %i to <4 x i32> + ret <4 x i32> %x +} + +define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 { +; SKX-LABEL: trunc_qd_256_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqd %ymm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <4 x i64> %i to <4 x i32> + store <4 x i32> %x, <4 x i32>* %res + ret void +} + +define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 { +; SKX-LABEL: trunc_qd_128: +; SKX: ## BB#0: +; SKX-NEXT: retq + %x = trunc <2 x i64> %i to <2 x i32> + ret <2 x i32> %x +} + +define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 { +; SKX-LABEL: trunc_qd_128_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovqd %xmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <2 x i64> %i to <2 x i32> + store <2 x i32> %x, <2 x i32>* %res + ret void +} + +define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 { +; SKX-LABEL: trunc_db_512: +; SKX: ## BB#0: +; SKX-NEXT: vpmovdb %zmm0, %xmm0 +; SKX-NEXT: retq + %x = trunc <16 x i32> %i to <16 x i8> + ret <16 x i8> %x +} + +define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 { +; SKX-LABEL: trunc_db_512_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovdb %zmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <16 x i32> %i to <16 x i8> + store <16 x i8> %x, <16 x i8>* %res + ret void +} + +define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 { +; SKX-LABEL: trunc_db_256: +; SKX: ## BB#0: +; SKX-NEXT: vpmovdw %ymm0, %xmm0 +; SKX-NEXT: retq + %x = trunc <8 x i32> %i to <8 x i8> + ret <8 x i8> %x +} + +define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 { +; SKX-LABEL: trunc_db_256_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovdb %ymm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <8 x i32> %i to <8 x i8> + store <8 x i8> %x, <8 x i8>* %res + ret void +} + +define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 { +; SKX-LABEL: trunc_db_128: +; SKX: ## BB#0: +; SKX-NEXT: retq + %x = trunc <4 x i32> %i to <4 x i8> + ret <4 x i8> %x +} + +define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 { +; SKX-LABEL: trunc_db_128_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovdb %xmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <4 x i32> %i to <4 x i8> + store <4 x i8> %x, <4 x i8>* %res + ret void +} + +define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 { +; SKX-LABEL: trunc_dw_512: +; SKX: ## BB#0: +; SKX-NEXT: vpmovdw %zmm0, %ymm0 +; SKX-NEXT: retq + %x = trunc <16 x i32> %i to <16 x i16> + ret <16 x i16> %x +} + +define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 { +; SKX-LABEL: trunc_dw_512_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovdw %zmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <16 x i32> %i to <16 x i16> + store <16 x i16> %x, <16 x i16>* %res + ret void +} + +define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 { +; SKX-LABEL: trunc_dw_256: +; SKX: ## BB#0: +; SKX-NEXT: vpmovdw %ymm0, %xmm0 +; SKX-NEXT: retq + %x = trunc <8 x i32> %i to <8 x i16> + ret <8 x i16> %x +} + +define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 { +; SKX-LABEL: trunc_dw_256_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovdw %ymm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <8 x i32> %i to <8 x i16> + store <8 x i16> %x, <8 x i16>* %res + ret void +} + +define <4 x i16> @trunc_dw_128(<4 x i32> %i) #0 { +; SKX-LABEL: trunc_dw_128: +; SKX: ## BB#0: +; SKX-NEXT: retq + %x = trunc <4 x i32> %i to <4 x i16> + ret <4 x i16> %x +} + +define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 { +; SKX-LABEL: trunc_dw_128_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovdw %xmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <4 x i32> %i to <4 x i16> + store <4 x i16> %x, <4 x i16>* %res + ret void +} + +define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 { +; SKX-LABEL: trunc_wb_512: +; SKX: ## BB#0: +; SKX-NEXT: vpmovwb %zmm0, %ymm0 +; SKX-NEXT: retq + %x = trunc <32 x i16> %i to <32 x i8> + ret <32 x i8> %x +} + +define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 { +; SKX-LABEL: trunc_wb_512_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovwb %zmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <32 x i16> %i to <32 x i8> + store <32 x i8> %x, <32 x i8>* %res + ret void +} + +define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 { +; SKX-LABEL: trunc_wb_256: +; SKX: ## BB#0: +; SKX-NEXT: vpmovwb %ymm0, %xmm0 +; SKX-NEXT: retq + %x = trunc <16 x i16> %i to <16 x i8> + ret <16 x i8> %x +} + +define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 { +; SKX-LABEL: trunc_wb_256_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovwb %ymm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <16 x i16> %i to <16 x i8> + store <16 x i8> %x, <16 x i8>* %res + ret void +} + +define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 { +; SKX-LABEL: trunc_wb_128: +; SKX: ## BB#0: +; SKX-NEXT: retq + %x = trunc <8 x i16> %i to <8 x i8> + ret <8 x i8> %x +} + +define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 { +; SKX-LABEL: trunc_wb_128_mem: +; SKX: ## BB#0: +; SKX-NEXT: vpmovwb %xmm0, (%rdi) +; SKX-NEXT: retq + %x = trunc <8 x i16> %i to <8 x i8> + store <8 x i8> %x, <8 x i8>* %res + ret void +} diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index b2b417df2f1..5ad28ab5ab5 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1008,6 +1008,84 @@ define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i ret <32 x i16> %res2 } +declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32) + +define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_512: +; CHECK: vpmovwb %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovwb %zmm0, %ymm0 + %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) + %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) + %res3 = add <32 x i8> %res0, %res1 + %res4 = add <32 x i8> %res3, %res2 + ret <32 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32) + +define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: +; CHECK: vpmovwb %zmm0, (%rdi) +; CHECK: vpmovwb %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) + call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) + ret void +} + +declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32) + +define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: +; CHECK: vpmovswb %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovswb %zmm0, %ymm0 + %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) + %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) + %res3 = add <32 x i8> %res0, %res1 + %res4 = add <32 x i8> %res3, %res2 + ret <32 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32) + +define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: +; CHECK: vpmovswb %zmm0, (%rdi) +; CHECK: vpmovswb %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) + call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) + ret void +} + +declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32) + +define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: +; CHECK: vpmovuswb %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovuswb %zmm0, %ymm0 + %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) + %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) + %res3 = add <32 x i8> %res0, %res1 + %res4 = add <32 x i8> %res3, %res2 + ret <32 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32) + +define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: +; CHECK: vpmovuswb %zmm0, (%rdi) +; CHECK: vpmovuswb %zmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) + call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) + ret void +} + declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 2373dc089ae..ee76ae2a8a3 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -3876,6 +3876,162 @@ define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i ret <16 x i16> %res2 } +declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_128: +; CHECK: vpmovwb %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovwb %xmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8) + +define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128: +; CHECK: vpmovwb %xmm0, (%rdi) +; CHECK: vpmovwb %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: +; CHECK: vpmovswb %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovswb %xmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8) + +define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: +; CHECK: vpmovswb %xmm0, (%rdi) +; CHECK: vpmovswb %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: +; CHECK: vpmovuswb %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovuswb %xmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8) + +define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: +; CHECK: vpmovuswb %xmm0, (%rdi) +; CHECK: vpmovuswb %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_256: +; CHECK: vpmovwb %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovwb %ymm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16) + +define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256: +; CHECK: vpmovwb %ymm0, (%rdi) +; CHECK: vpmovwb %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_256: +; CHECK: vpmovswb %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovswb %ymm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16) + +define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: +; CHECK: vpmovswb %ymm0, (%rdi) +; CHECK: vpmovswb %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_256: +; CHECK: vpmovuswb %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovuswb %ymm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16) + +define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: +; CHECK: vpmovuswb %ymm0, (%rdi) +; CHECK: vpmovuswb %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) + ret void +} + declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8) define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 46ee51f47b6..7812148de1c 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3005,6 +3005,786 @@ define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x ret <8 x float> %res2 } +declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128: +; CHECK: vpmovqb %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqb %xmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128: +; CHECK: vpmovqb %xmm0, (%rdi) +; CHECK: vpmovqb %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128: +; CHECK: vpmovsqb %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqb %xmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128: +; CHECK: vpmovsqb %xmm0, (%rdi) +; CHECK: vpmovsqb %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128: +; CHECK: vpmovusqb %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqb %xmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128: +; CHECK: vpmovusqb %xmm0, (%rdi) +; CHECK: vpmovusqb %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256: +; CHECK: vpmovqb %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqb %ymm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256: +; CHECK: vpmovqb %ymm0, (%rdi) +; CHECK: vpmovqb %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256: +; CHECK: vpmovsqb %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqb %ymm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256: +; CHECK: vpmovsqb %ymm0, (%rdi) +; CHECK: vpmovsqb %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256: +; CHECK: vpmovusqb %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256: +; CHECK: vpmovusqb %ymm0, (%rdi) +; CHECK: vpmovusqb %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128: +; CHECK: vpmovqw %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqw %xmm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128: +; CHECK: vpmovqw %xmm0, (%rdi) +; CHECK: vpmovqw %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128: +; CHECK: vpmovsqw %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqw %xmm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128: +; CHECK: vpmovsqw %xmm0, (%rdi) +; CHECK: vpmovsqw %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128: +; CHECK: vpmovusqw %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqw %xmm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128: +; CHECK: vpmovusqw %xmm0, (%rdi) +; CHECK: vpmovusqw %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256: +; CHECK: vpmovqw %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqw %ymm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256: +; CHECK: vpmovqw %ymm0, (%rdi) +; CHECK: vpmovqw %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256: +; CHECK: vpmovsqw %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqw %ymm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256: +; CHECK: vpmovsqw %ymm0, (%rdi) +; CHECK: vpmovsqw %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256: +; CHECK: vpmovusqw %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqw %ymm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256: +; CHECK: vpmovusqw %ymm0, (%rdi) +; CHECK: vpmovusqw %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) + ret void +} + +declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128: +; CHECK: vpmovqd %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqd %xmm0, %xmm0 + %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) + %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) + %res3 = add <4 x i32> %res0, %res1 + %res4 = add <4 x i32> %res3, %res2 + ret <4 x i32> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128: +; CHECK: vpmovqd %xmm0, (%rdi) +; CHECK: vpmovqd %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) + ret void +} + +declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128: +; CHECK: vpmovsqd %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqd %xmm0, %xmm0 + %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) + %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) + %res3 = add <4 x i32> %res0, %res1 + %res4 = add <4 x i32> %res3, %res2 + ret <4 x i32> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128: +; CHECK: vpmovsqd %xmm0, (%rdi) +; CHECK: vpmovsqd %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) + ret void +} + +declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128: +; CHECK: vpmovusqd %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqd %xmm0, %xmm0 + %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) + %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) + %res3 = add <4 x i32> %res0, %res1 + %res4 = add <4 x i32> %res3, %res2 + ret <4 x i32> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128: +; CHECK: vpmovusqd %xmm0, (%rdi) +; CHECK: vpmovusqd %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) + ret void +} + +declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256: +; CHECK: vpmovqd %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovqd %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqd %ymm0, %xmm0 + %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) + %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) + %res3 = add <4 x i32> %res0, %res1 + %res4 = add <4 x i32> %res3, %res2 + ret <4 x i32> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256: +; CHECK: vpmovqd %ymm0, (%rdi) +; CHECK: vpmovqd %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) + ret void +} + +declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256: +; CHECK: vpmovsqd %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsqd %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqd %ymm0, %xmm0 + %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) + %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) + %res3 = add <4 x i32> %res0, %res1 + %res4 = add <4 x i32> %res3, %res2 + ret <4 x i32> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256: +; CHECK: vpmovsqd %ymm0, (%rdi) +; CHECK: vpmovsqd %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) + ret void +} + +declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256: +; CHECK: vpmovusqd %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusqd %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqd %ymm0, %xmm0 + %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) + %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) + %res3 = add <4 x i32> %res0, %res1 + %res4 = add <4 x i32> %res3, %res2 + ret <4 x i32> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256: +; CHECK: vpmovusqd %ymm0, (%rdi) +; CHECK: vpmovusqd %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128: +; CHECK: vpmovdb %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovdb %xmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128: +; CHECK: vpmovdb %xmm0, (%rdi) +; CHECK: vpmovdb %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128: +; CHECK: vpmovsdb %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsdb %xmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128: +; CHECK: vpmovsdb %xmm0, (%rdi) +; CHECK: vpmovsdb %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128: +; CHECK: vpmovusdb %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusdb %xmm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128: +; CHECK: vpmovusdb %xmm0, (%rdi) +; CHECK: vpmovusdb %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256: +; CHECK: vpmovdb %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovdb %ymm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256: +; CHECK: vpmovdb %ymm0, (%rdi) +; CHECK: vpmovdb %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256: +; CHECK: vpmovsdb %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsdb %ymm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256: +; CHECK: vpmovsdb %ymm0, (%rdi) +; CHECK: vpmovsdb %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256: +; CHECK: vpmovusdb %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusdb %ymm0, %xmm0 + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256: +; CHECK: vpmovusdb %ymm0, (%rdi) +; CHECK: vpmovusdb %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128: +; CHECK: vpmovdw %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovdw %xmm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128: +; CHECK: vpmovdw %xmm0, (%rdi) +; CHECK: vpmovdw %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128: +; CHECK: vpmovsdw %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsdw %xmm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128: +; CHECK: vpmovsdw %xmm0, (%rdi) +; CHECK: vpmovsdw %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128: +; CHECK: vpmovusdw %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusdw %xmm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128: +; CHECK: vpmovusdw %xmm0, (%rdi) +; CHECK: vpmovusdw %xmm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256: +; CHECK: vpmovdw %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovdw %ymm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256: +; CHECK: vpmovdw %ymm0, (%rdi) +; CHECK: vpmovdw %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256: +; CHECK: vpmovsdw %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsdw %ymm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256: +; CHECK: vpmovsdw %ymm0, (%rdi) +; CHECK: vpmovsdw %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256: +; CHECK: vpmovusdw %ymm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusdw %ymm0, %xmm0 + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8) + +define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256: +; CHECK: vpmovusdw %ymm0, (%rdi) +; CHECK: vpmovusdw %ymm0, (%rdi) {%k1} + call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) + ret void +} + declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8) define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index 6c16e634a59..f51d4fa103e 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -190,10 +190,13 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { ; AVX2-LABEL: test15 ; AVX2: vpmaskmovd -; SKX-LABEL: test15 -; SKX: kshiftl -; SKX: kshiftr -; SKX: vmovdqu32 {{.*}}{%k1} +; SKX-LABEL: test15: +; SKX: ## BB#0: +; SKX-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1 +; SKX-NEXT: vpmovqd %xmm1, (%rdi) {%k1} +; SKX-NEXT: retq define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { %mask = icmp eq <2 x i32> %trigger, zeroinitializer call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask) diff --git a/llvm/test/MC/X86/x86-64-avx512bw.s b/llvm/test/MC/X86/x86-64-avx512bw.s index 8d72b086a6c..9e10ca42c3b 100644 --- a/llvm/test/MC/X86/x86-64-avx512bw.s +++ b/llvm/test/MC/X86/x86-64-avx512bw.s @@ -3668,6 +3668,126 @@ // CHECK: encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0xc0,0xdf,0xff,0xff] vpabsw -8256(%rdx), %zmm30 +// CHECK: vpmovwb %zmm27, %ymm22 +// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x30,0xde] + vpmovwb %zmm27, %ymm22 + +// CHECK: vpmovwb %zmm27, %ymm22 {%k1} +// CHECK: encoding: [0x62,0x22,0x7e,0x49,0x30,0xde] + vpmovwb %zmm27, %ymm22 {%k1} + +// CHECK: vpmovwb %zmm27, %ymm22 {%k1} {z} +// CHECK: encoding: [0x62,0x22,0x7e,0xc9,0x30,0xde] + vpmovwb %zmm27, %ymm22 {%k1} {z} + +// CHECK: vpmovwb %zmm22, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x31] + vpmovwb %zmm22, (%rcx) + +// CHECK: vpmovwb %zmm22, (%rcx) {%k4} +// CHECK: encoding: [0x62,0xe2,0x7e,0x4c,0x30,0x31] + vpmovwb %zmm22, (%rcx) {%k4} + +// CHECK: vpmovwb %zmm22, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x48,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpmovwb %zmm22, 291(%rax,%r14,8) + +// CHECK: vpmovwb %zmm22, 4064(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x7f] + vpmovwb %zmm22, 4064(%rdx) + +// CHECK: vpmovwb %zmm22, 4096(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0x00,0x10,0x00,0x00] + vpmovwb %zmm22, 4096(%rdx) + +// CHECK: vpmovwb %zmm22, -4096(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x80] + vpmovwb %zmm22, -4096(%rdx) + +// CHECK: vpmovwb %zmm22, -4128(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0xe0,0xef,0xff,0xff] + vpmovwb %zmm22, -4128(%rdx) + +// CHECK: vpmovswb %zmm18, %ymm23 +// CHECK: encoding: [0x62,0xa2,0x7e,0x48,0x20,0xd7] + vpmovswb %zmm18, %ymm23 + +// CHECK: vpmovswb %zmm18, %ymm23 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7e,0x4a,0x20,0xd7] + vpmovswb %zmm18, %ymm23 {%k2} + +// CHECK: vpmovswb %zmm18, %ymm23 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0xca,0x20,0xd7] + vpmovswb %zmm18, %ymm23 {%k2} {z} + +// CHECK: vpmovswb %zmm24, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x01] + vpmovswb %zmm24, (%rcx) + +// CHECK: vpmovswb %zmm24, (%rcx) {%k7} +// CHECK: encoding: [0x62,0x62,0x7e,0x4f,0x20,0x01] + vpmovswb %zmm24, (%rcx) {%k7} + +// CHECK: vpmovswb %zmm24, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x20,0x84,0xf0,0x23,0x01,0x00,0x00] + vpmovswb %zmm24, 291(%rax,%r14,8) + +// CHECK: vpmovswb %zmm24, 4064(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x7f] + vpmovswb %zmm24, 4064(%rdx) + +// CHECK: vpmovswb %zmm24, 4096(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0x00,0x10,0x00,0x00] + vpmovswb %zmm24, 4096(%rdx) + +// CHECK: vpmovswb %zmm24, -4096(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x80] + vpmovswb %zmm24, -4096(%rdx) + +// CHECK: vpmovswb %zmm24, -4128(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0xe0,0xef,0xff,0xff] + vpmovswb %zmm24, -4128(%rdx) + +// CHECK: vpmovuswb %zmm22, %ymm28 +// CHECK: encoding: [0x62,0x82,0x7e,0x48,0x10,0xf4] + vpmovuswb %zmm22, %ymm28 + +// CHECK: vpmovuswb %zmm22, %ymm28 {%k3} +// CHECK: encoding: [0x62,0x82,0x7e,0x4b,0x10,0xf4] + vpmovuswb %zmm22, %ymm28 {%k3} + +// CHECK: vpmovuswb %zmm22, %ymm28 {%k3} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0xcb,0x10,0xf4] + vpmovuswb %zmm22, %ymm28 {%k3} {z} + +// CHECK: vpmovuswb %zmm27, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x19] + vpmovuswb %zmm27, (%rcx) + +// CHECK: vpmovuswb %zmm27, (%rcx) {%k2} +// CHECK: encoding: [0x62,0x62,0x7e,0x4a,0x10,0x19] + vpmovuswb %zmm27, (%rcx) {%k2} + +// CHECK: vpmovuswb %zmm27, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x10,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpmovuswb %zmm27, 291(%rax,%r14,8) + +// CHECK: vpmovuswb %zmm27, 4064(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x7f] + vpmovuswb %zmm27, 4064(%rdx) + +// CHECK: vpmovuswb %zmm27, 4096(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0x00,0x10,0x00,0x00] + vpmovuswb %zmm27, 4096(%rdx) + +// CHECK: vpmovuswb %zmm27, -4096(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x80] + vpmovuswb %zmm27, -4096(%rdx) + +// CHECK: vpmovuswb %zmm27, -4128(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0xe0,0xef,0xff,0xff] + vpmovuswb %zmm27, -4128(%rdx) + // CHECK: vpmulhuw %zmm21, %zmm24, %zmm21 // CHECK: encoding: [0x62,0xa1,0x3d,0x40,0xe4,0xed] vpmulhuw %zmm21, %zmm24, %zmm21 diff --git a/llvm/test/MC/X86/x86-64-avx512bw_vl.s b/llvm/test/MC/X86/x86-64-avx512bw_vl.s index dcc9db577eb..24b8a56efe6 100644 --- a/llvm/test/MC/X86/x86-64-avx512bw_vl.s +++ b/llvm/test/MC/X86/x86-64-avx512bw_vl.s @@ -6583,6 +6583,486 @@ // CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0xe0,0xef,0xff,0xff] vpshufb -4128(%rdx), %ymm18, %ymm19 +// CHECK: vpmovwb %xmm28, %xmm27 +// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x30,0xe3] + vpmovwb %xmm28, %xmm27 + +// CHECK: vpmovwb %xmm28, %xmm27 {%k2} +// CHECK: encoding: [0x62,0x02,0x7e,0x0a,0x30,0xe3] + vpmovwb %xmm28, %xmm27 {%k2} + +// CHECK: vpmovwb %xmm28, %xmm27 {%k2} {z} +// CHECK: encoding: [0x62,0x02,0x7e,0x8a,0x30,0xe3] + vpmovwb %xmm28, %xmm27 {%k2} {z} + +// CHECK: vpmovwb %ymm26, %xmm26 +// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x30,0xd2] + vpmovwb %ymm26, %xmm26 + +// CHECK: vpmovwb %ymm26, %xmm26 {%k4} +// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x30,0xd2] + vpmovwb %ymm26, %xmm26 {%k4} + +// CHECK: vpmovwb %ymm26, %xmm26 {%k4} {z} +// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x30,0xd2] + vpmovwb %ymm26, %xmm26 {%k4} {z} + +// CHECK: vpmovwb %xmm23, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x39] + vpmovwb %xmm23,(%rcx) + +// CHECK: vpmovwb %xmm23, (%rcx) {%k6} +// CHECK: encoding: [0x62,0xe2,0x7e,0x0e,0x30,0x39] + vpmovwb %xmm23,(%rcx) {%k6} + +// CHECK: vpmovwb %xmm23, 4660(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xbc,0xf0,0x34,0x12,0x00,0x00] + vpmovwb %xmm23,4660(%rax,%r14,8) + +// CHECK: vpmovwb %xmm23, 1016(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x7f] + vpmovwb %xmm23, 1016(%rdx) + +// CHECK: vpmovwb %xmm23, 1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0x00,0x04,0x00,0x00] + vpmovwb %xmm23, 1024(%rdx) + +// CHECK: vpmovwb %xmm23, -1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x80] + vpmovwb %xmm23,-1024(%rdx) + +// CHECK: vpmovwb %xmm23, -1032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0xf8,0xfb,0xff,0xff] + vpmovwb %xmm23,-1032(%rdx) + +// CHECK: vpmovwb %ymm21, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x29] + vpmovwb %ymm21,(%rcx) + +// CHECK: vpmovwb %ymm21, (%rcx) {%k5} +// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x30,0x29] + vpmovwb %ymm21,(%rcx) {%k5} + +// CHECK: vpmovwb %ymm21, 4660(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x30,0xac,0xf0,0x34,0x12,0x00,0x00] + vpmovwb %ymm21, 4660(%rax,%r14,8) + +// CHECK: vpmovwb %ymm21, 2032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x7f] + vpmovwb %ymm21, 2032(%rdx) + +// CHECK: vpmovwb %ymm21, 2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0x00,0x08,0x00,0x00] + vpmovwb %ymm21, 2048(%rdx) + +// CHECK: vpmovwb %ymm21, -2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x80] + vpmovwb %ymm21,-2048(%rdx) + +// CHECK: vpmovwb %ymm21, -2064(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0xf0,0xf7,0xff,0xff] + vpmovwb %ymm21, -2064(%rdx) + +// CHECK: vpmovswb %xmm19, %xmm17 +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x20,0xd9] + vpmovswb %xmm19, %xmm17 + +// CHECK: vpmovswb %xmm19, %xmm17 {%k1} +// CHECK: encoding: [0x62,0xa2,0x7e,0x09,0x20,0xd9] + vpmovswb %xmm19, %xmm17 {%k1} + +// CHECK: vpmovswb %xmm19, %xmm17 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0x89,0x20,0xd9] + vpmovswb %xmm19, %xmm17 {%k1} {z} + +// CHECK: vpmovswb %ymm19, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x20,0xdd] + vpmovswb %ymm19, %xmm21 + +// CHECK: vpmovswb %ymm19, %xmm21 {%k4} +// CHECK: encoding: [0x62,0xa2,0x7e,0x2c,0x20,0xdd] + vpmovswb %ymm19, %xmm21 {%k4} + +// CHECK: vpmovswb %ymm19, %xmm21 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0xac,0x20,0xdd] + vpmovswb %ymm19, %xmm21 {%k4} {z} + +// CHECK: vpmovswb %xmm18, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x11] + vpmovswb %xmm18,(%rcx) + +// CHECK: vpmovswb %xmm18, (%rcx) {%k2} +// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x20,0x11] + vpmovswb %xmm18,(%rcx) {%k2} + +// CHECK: vpmovswb %xmm18, 4660(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x20,0x94,0xf0,0x34,0x12,0x00,0x00] + vpmovswb %xmm18, 4660(%rax,%r14,8) + +// CHECK: vpmovswb %xmm18, 1016(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x7f] + vpmovswb %xmm18, 1016(%rdx) + +// CHECK: vpmovswb %xmm18, 1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0x00,0x04,0x00,0x00] + vpmovswb %xmm18, 1024(%rdx) + +// CHECK: vpmovswb %xmm18, -1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x80] + vpmovswb %xmm18, -1024(%rdx) + +// CHECK: vpmovswb %xmm18, -1032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0xf8,0xfb,0xff,0xff] + vpmovswb %xmm18, -1032(%rdx) + +// CHECK: vpmovswb %ymm23, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x39] + vpmovswb %ymm23,(%rcx) + +// CHECK: vpmovswb %ymm23, (%rcx) {%k2} +// CHECK: encoding: [0x62,0xe2,0x7e,0x2a,0x20,0x39] + vpmovswb %ymm23,(%rcx) {%k2} + +// CHECK: vpmovswb %ymm23, 4660(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x20,0xbc,0xf0,0x34,0x12,0x00,0x00] + vpmovswb %ymm23, 4660(%rax,%r14,8) + +// CHECK: vpmovswb %ymm23, 2032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x7f] + vpmovswb %ymm23, 2032(%rdx) + +// CHECK: vpmovswb %ymm23, 2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0x00,0x08,0x00,0x00] + vpmovswb %ymm23, 2048(%rdx) + +// CHECK: vpmovswb %ymm23, -2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x80] + vpmovswb %ymm23, -2048(%rdx) + +// CHECK: vpmovswb %ymm23, -2064(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0xf0,0xf7,0xff,0xff] + vpmovswb %ymm23, -2064(%rdx) + +// CHECK: vpmovuswb %xmm17, %xmm26 +// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x10,0xca] + vpmovuswb %xmm17, %xmm26 + +// CHECK: vpmovuswb %xmm17, %xmm26 {%k6} +// CHECK: encoding: [0x62,0x82,0x7e,0x0e,0x10,0xca] + vpmovuswb %xmm17, %xmm26 {%k6} + +// CHECK: vpmovuswb %xmm17, %xmm26 {%k6} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0x8e,0x10,0xca] + vpmovuswb %xmm17, %xmm26 {%k6} {z} + +// CHECK: vpmovuswb %ymm26, %xmm17 +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x10,0xd1] + vpmovuswb %ymm26, %xmm17 + +// CHECK: vpmovuswb %ymm26, %xmm17 {%k2} +// CHECK: encoding: [0x62,0x22,0x7e,0x2a,0x10,0xd1] + vpmovuswb %ymm26, %xmm17 {%k2} + +// CHECK: vpmovuswb %ymm26, %xmm17 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0x7e,0xaa,0x10,0xd1] + vpmovuswb %ymm26, %xmm17 {%k2} {z} + +// CHECK: vpmovuswb %xmm19, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x19] + vpmovuswb %xmm19,(%rcx) + +// CHECK: vpmovuswb %xmm19, (%rcx) {%k1} +// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x10,0x19] + vpmovuswb %xmm19,(%rcx) {%k1} + +// CHECK: vpmovuswb %xmm19, 4660(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x10,0x9c,0xf0,0x34,0x12,0x00,0x00] + vpmovuswb %xmm19, 4660(%rax,%r14,8) + +// CHECK: vpmovuswb %xmm19, 1016(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x7f] + vpmovuswb %xmm19, 1016(%rdx) + +// CHECK: vpmovuswb %xmm19, 1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0x00,0x04,0x00,0x00] + vpmovuswb %xmm19, 1024(%rdx) + +// CHECK: vpmovuswb %xmm19, -1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x80] + vpmovuswb %xmm19, -1024(%rdx) + +// CHECK: vpmovuswb %xmm19, -1032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0xf8,0xfb,0xff,0xff] + vpmovuswb %xmm19, -1032(%rdx) + +// CHECK: vpmovuswb %ymm23, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x39] + vpmovuswb %ymm23,(%rcx) + +// CHECK: vpmovuswb %ymm23, (%rcx) {%k6} +// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x10,0x39] + vpmovuswb %ymm23,(%rcx) {%k6} + +// CHECK: vpmovuswb %ymm23, 4660(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x10,0xbc,0xf0,0x34,0x12,0x00,0x00] + vpmovuswb %ymm23, 4660(%rax,%r14,8) + +// CHECK: vpmovuswb %ymm23, 2032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x7f] + vpmovuswb %ymm23, 2032(%rdx) + +// CHECK: vpmovuswb %ymm23, 2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0x00,0x08,0x00,0x00] + vpmovuswb %ymm23, 2048(%rdx) + +// CHECK: vpmovuswb %ymm23, -2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x80] + vpmovuswb %ymm23, -2048(%rdx) + +// CHECK: vpmovuswb %ymm23, -2064(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0xf0,0xf7,0xff,0xff] + vpmovuswb %ymm23, -2064(%rdx) + +// CHECK: vpmovwb %xmm17, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xcd] + vpmovwb %xmm17, %xmm21 + +// CHECK: vpmovwb %xmm17, %xmm21 {%k1} +// CHECK: encoding: [0x62,0xa2,0x7e,0x09,0x30,0xcd] + vpmovwb %xmm17, %xmm21 {%k1} + +// CHECK: vpmovwb %xmm17, %xmm21 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0x89,0x30,0xcd] + vpmovwb %xmm17, %xmm21 {%k1} {z} + +// CHECK: vpmovwb %ymm23, %xmm26 +// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x30,0xfa] + vpmovwb %ymm23, %xmm26 + +// CHECK: vpmovwb %ymm23, %xmm26 {%k7} +// CHECK: encoding: [0x62,0x82,0x7e,0x2f,0x30,0xfa] + vpmovwb %ymm23, %xmm26 {%k7} + +// CHECK: vpmovwb %ymm23, %xmm26 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0xaf,0x30,0xfa] + vpmovwb %ymm23, %xmm26 {%k7} {z} + +// CHECK: vpmovwb %xmm21, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x29] + vpmovwb %xmm21, (%rcx) + +// CHECK: vpmovwb %xmm21, (%rcx) {%k2} +// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x30,0x29] + vpmovwb %xmm21, (%rcx) {%k2} + +// CHECK: vpmovwb %xmm21, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovwb %xmm21, 291(%rax,%r14,8) + +// CHECK: vpmovwb %xmm21, 1016(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x7f] + vpmovwb %xmm21, 1016(%rdx) + +// CHECK: vpmovwb %xmm21, 1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0x00,0x04,0x00,0x00] + vpmovwb %xmm21, 1024(%rdx) + +// CHECK: vpmovwb %xmm21, -1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x80] + vpmovwb %xmm21, -1024(%rdx) + +// CHECK: vpmovwb %xmm21, -1032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0xf8,0xfb,0xff,0xff] + vpmovwb %xmm21, -1032(%rdx) + +// CHECK: vpmovwb %ymm20, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x21] + vpmovwb %ymm20, (%rcx) + +// CHECK: vpmovwb %ymm20, (%rcx) {%k4} +// CHECK: encoding: [0x62,0xe2,0x7e,0x2c,0x30,0x21] + vpmovwb %ymm20, (%rcx) {%k4} + +// CHECK: vpmovwb %ymm20, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x30,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpmovwb %ymm20, 291(%rax,%r14,8) + +// CHECK: vpmovwb %ymm20, 2032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x7f] + vpmovwb %ymm20, 2032(%rdx) + +// CHECK: vpmovwb %ymm20, 2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0x00,0x08,0x00,0x00] + vpmovwb %ymm20, 2048(%rdx) + +// CHECK: vpmovwb %ymm20, -2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x80] + vpmovwb %ymm20, -2048(%rdx) + +// CHECK: vpmovwb %ymm20, -2064(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0xf0,0xf7,0xff,0xff] + vpmovwb %ymm20, -2064(%rdx) + +// CHECK: vpmovswb %xmm20, %xmm24 +// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x20,0xe0] + vpmovswb %xmm20, %xmm24 + +// CHECK: vpmovswb %xmm20, %xmm24 {%k4} +// CHECK: encoding: [0x62,0x82,0x7e,0x0c,0x20,0xe0] + vpmovswb %xmm20, %xmm24 {%k4} + +// CHECK: vpmovswb %xmm20, %xmm24 {%k4} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0x8c,0x20,0xe0] + vpmovswb %xmm20, %xmm24 {%k4} {z} + +// CHECK: vpmovswb %ymm18, %xmm27 +// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x20,0xd3] + vpmovswb %ymm18, %xmm27 + +// CHECK: vpmovswb %ymm18, %xmm27 {%k1} +// CHECK: encoding: [0x62,0x82,0x7e,0x29,0x20,0xd3] + vpmovswb %ymm18, %xmm27 {%k1} + +// CHECK: vpmovswb %ymm18, %xmm27 {%k1} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0xa9,0x20,0xd3] + vpmovswb %ymm18, %xmm27 {%k1} {z} + +// CHECK: vpmovswb %xmm24, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x01] + vpmovswb %xmm24, (%rcx) + +// CHECK: vpmovswb %xmm24, (%rcx) {%k3} +// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x20,0x01] + vpmovswb %xmm24, (%rcx) {%k3} + +// CHECK: vpmovswb %xmm24, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x20,0x84,0xf0,0x23,0x01,0x00,0x00] + vpmovswb %xmm24, 291(%rax,%r14,8) + +// CHECK: vpmovswb %xmm24, 1016(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x7f] + vpmovswb %xmm24, 1016(%rdx) + +// CHECK: vpmovswb %xmm24, 1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0x00,0x04,0x00,0x00] + vpmovswb %xmm24, 1024(%rdx) + +// CHECK: vpmovswb %xmm24, -1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x80] + vpmovswb %xmm24, -1024(%rdx) + +// CHECK: vpmovswb %xmm24, -1032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0xf8,0xfb,0xff,0xff] + vpmovswb %xmm24, -1032(%rdx) + +// CHECK: vpmovswb %ymm27, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x19] + vpmovswb %ymm27, (%rcx) + +// CHECK: vpmovswb %ymm27, (%rcx) {%k7} +// CHECK: encoding: [0x62,0x62,0x7e,0x2f,0x20,0x19] + vpmovswb %ymm27, (%rcx) {%k7} + +// CHECK: vpmovswb %ymm27, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x20,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpmovswb %ymm27, 291(%rax,%r14,8) + +// CHECK: vpmovswb %ymm27, 2032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x7f] + vpmovswb %ymm27, 2032(%rdx) + +// CHECK: vpmovswb %ymm27, 2048(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0x00,0x08,0x00,0x00] + vpmovswb %ymm27, 2048(%rdx) + +// CHECK: vpmovswb %ymm27, -2048(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x80] + vpmovswb %ymm27, -2048(%rdx) + +// CHECK: vpmovswb %ymm27, -2064(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0xf0,0xf7,0xff,0xff] + vpmovswb %ymm27, -2064(%rdx) + +// CHECK: vpmovuswb %xmm19, %xmm23 +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x10,0xdf] + vpmovuswb %xmm19, %xmm23 + +// CHECK: vpmovuswb %xmm19, %xmm23 {%k4} +// CHECK: encoding: [0x62,0xa2,0x7e,0x0c,0x10,0xdf] + vpmovuswb %xmm19, %xmm23 {%k4} + +// CHECK: vpmovuswb %xmm19, %xmm23 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0x8c,0x10,0xdf] + vpmovuswb %xmm19, %xmm23 {%k4} {z} + +// CHECK: vpmovuswb %ymm23, %xmm28 +// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x10,0xfc] + vpmovuswb %ymm23, %xmm28 + +// CHECK: vpmovuswb %ymm23, %xmm28 {%k6} +// CHECK: encoding: [0x62,0x82,0x7e,0x2e,0x10,0xfc] + vpmovuswb %ymm23, %xmm28 {%k6} + +// CHECK: vpmovuswb %ymm23, %xmm28 {%k6} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0xae,0x10,0xfc] + vpmovuswb %ymm23, %xmm28 {%k6} {z} + +// CHECK: vpmovuswb %xmm25, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x09] + vpmovuswb %xmm25, (%rcx) + +// CHECK: vpmovuswb %xmm25, (%rcx) {%k3} +// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x10,0x09] + vpmovuswb %xmm25, (%rcx) {%k3} + +// CHECK: vpmovuswb %xmm25, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpmovuswb %xmm25, 291(%rax,%r14,8) + +// CHECK: vpmovuswb %xmm25, 1016(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x7f] + vpmovuswb %xmm25, 1016(%rdx) + +// CHECK: vpmovuswb %xmm25, 1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0x00,0x04,0x00,0x00] + vpmovuswb %xmm25, 1024(%rdx) + +// CHECK: vpmovuswb %xmm25, -1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x80] + vpmovuswb %xmm25, -1024(%rdx) + +// CHECK: vpmovuswb %xmm25, -1032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff] + vpmovuswb %xmm25, -1032(%rdx) + +// CHECK: vpmovuswb %ymm28, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x21] + vpmovuswb %ymm28, (%rcx) + +// CHECK: vpmovuswb %ymm28, (%rcx) {%k2} +// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x10,0x21] + vpmovuswb %ymm28, (%rcx) {%k2} + +// CHECK: vpmovuswb %ymm28, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x10,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpmovuswb %ymm28, 291(%rax,%r14,8) + +// CHECK: vpmovuswb %ymm28, 2032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x7f] + vpmovuswb %ymm28, 2032(%rdx) + +// CHECK: vpmovuswb %ymm28, 2048(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0x00,0x08,0x00,0x00] + vpmovuswb %ymm28, 2048(%rdx) + +// CHECK: vpmovuswb %ymm28, -2048(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x80] + vpmovuswb %ymm28, -2048(%rdx) + +// CHECK: vpmovuswb %ymm28, -2064(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0xf0,0xf7,0xff,0xff] + vpmovuswb %ymm28, -2064(%rdx) + // CHECK: vpmulhuw %xmm18, %xmm21, %xmm24 // CHECK: encoding: [0x62,0x21,0x55,0x00,0xe4,0xc2] vpmulhuw %xmm18, %xmm21, %xmm24 diff --git a/llvm/test/MC/X86/x86-64-avx512f_vl.s b/llvm/test/MC/X86/x86-64-avx512f_vl.s index eca2ffbfc09..9280be94716 100644 --- a/llvm/test/MC/X86/x86-64-avx512f_vl.s +++ b/llvm/test/MC/X86/x86-64-avx512f_vl.s @@ -16285,6 +16285,1206 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1 // CHECK: encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0xfc,0xfd,0xff,0xff] vscalefps -516(%rdx){1to8}, %ymm22, %ymm25 +// CHECK: vpmovqb %xmm29, %xmm24 +// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x32,0xe8] + vpmovqb %xmm29, %xmm24 + +// CHECK: vpmovqb %xmm29, %xmm24 {%k4} +// CHECK: encoding: [0x62,0x02,0x7e,0x0c,0x32,0xe8] + vpmovqb %xmm29, %xmm24 {%k4} + +// CHECK: vpmovqb %xmm29, %xmm24 {%k4} {z} +// CHECK: encoding: [0x62,0x02,0x7e,0x8c,0x32,0xe8] + vpmovqb %xmm29, %xmm24 {%k4} {z} + +// CHECK: vpmovqb %ymm29, %xmm17 +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x32,0xe9] + vpmovqb %ymm29, %xmm17 + +// CHECK: vpmovqb %ymm29, %xmm17 {%k3} +// CHECK: encoding: [0x62,0x22,0x7e,0x2b,0x32,0xe9] + vpmovqb %ymm29, %xmm17 {%k3} + +// CHECK: vpmovqb %ymm29, %xmm17 {%k3} {z} +// CHECK: encoding: [0x62,0x22,0x7e,0xab,0x32,0xe9] + vpmovqb %ymm29, %xmm17 {%k3} {z} + +// CHECK: vpmovqb %xmm27, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x19] + vpmovqb %xmm27, (%rcx) + +// CHECK: vpmovqb %xmm27, (%rcx) {%k2} +// CHECK: encoding: [0x62,0x62,0x7e,0x0a,0x32,0x19] + vpmovqb %xmm27, (%rcx) {%k2} + +// CHECK: vpmovqb %xmm27, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x32,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpmovqb %xmm27, 291(%rax,%r14,8) + +// CHECK: vpmovqb %xmm27, 254(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x5a,0x7f] + vpmovqb %xmm27, 254(%rdx) + +// CHECK: vpmovqb %xmm27, 256(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x9a,0x00,0x01,0x00,0x00] + vpmovqb %xmm27, 256(%rdx) + +// CHECK: vpmovqb %xmm27, -256(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x5a,0x80] + vpmovqb %xmm27, -256(%rdx) + +// CHECK: vpmovqb %xmm27, -258(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x9a,0xfe,0xfe,0xff,0xff] + vpmovqb %xmm27, -258(%rdx) + +// CHECK: vpmovqb %ymm28, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x21] + vpmovqb %ymm28, (%rcx) + +// CHECK: vpmovqb %ymm28, (%rcx) {%k7} +// CHECK: encoding: [0x62,0x62,0x7e,0x2f,0x32,0x21] + vpmovqb %ymm28, (%rcx) {%k7} + +// CHECK: vpmovqb %ymm28, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x32,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpmovqb %ymm28, 291(%rax,%r14,8) + +// CHECK: vpmovqb %ymm28, 508(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x62,0x7f] + vpmovqb %ymm28, 508(%rdx) + +// CHECK: vpmovqb %ymm28, 512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0xa2,0x00,0x02,0x00,0x00] + vpmovqb %ymm28, 512(%rdx) + +// CHECK: vpmovqb %ymm28, -512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x62,0x80] + vpmovqb %ymm28, -512(%rdx) + +// CHECK: vpmovqb %ymm28, -516(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0xa2,0xfc,0xfd,0xff,0xff] + vpmovqb %ymm28, -516(%rdx) + +// CHECK: vpmovsqb %xmm19, %xmm26 +// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x22,0xda] + vpmovsqb %xmm19, %xmm26 + +// CHECK: vpmovsqb %xmm19, %xmm26 {%k1} +// CHECK: encoding: [0x62,0x82,0x7e,0x09,0x22,0xda] + vpmovsqb %xmm19, %xmm26 {%k1} + +// CHECK: vpmovsqb %xmm19, %xmm26 {%k1} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0x89,0x22,0xda] + vpmovsqb %xmm19, %xmm26 {%k1} {z} + +// CHECK: vpmovsqb %ymm20, %xmm20 +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x22,0xe4] + vpmovsqb %ymm20, %xmm20 + +// CHECK: vpmovsqb %ymm20, %xmm20 {%k6} +// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x22,0xe4] + vpmovsqb %ymm20, %xmm20 {%k6} + +// CHECK: vpmovsqb %ymm20, %xmm20 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x22,0xe4] + vpmovsqb %ymm20, %xmm20 {%k6} {z} + +// CHECK: vpmovsqb %xmm25, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x09] + vpmovsqb %xmm25, (%rcx) + +// CHECK: vpmovsqb %xmm25, (%rcx) {%k7} +// CHECK: encoding: [0x62,0x62,0x7e,0x0f,0x22,0x09] + vpmovsqb %xmm25, (%rcx) {%k7} + +// CHECK: vpmovsqb %xmm25, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpmovsqb %xmm25, 291(%rax,%r14,8) + +// CHECK: vpmovsqb %xmm25, 254(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x4a,0x7f] + vpmovsqb %xmm25, 254(%rdx) + +// CHECK: vpmovsqb %xmm25, 256(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x8a,0x00,0x01,0x00,0x00] + vpmovsqb %xmm25, 256(%rdx) + +// CHECK: vpmovsqb %xmm25, -256(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x4a,0x80] + vpmovsqb %xmm25, -256(%rdx) + +// CHECK: vpmovsqb %xmm25, -258(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x8a,0xfe,0xfe,0xff,0xff] + vpmovsqb %xmm25, -258(%rdx) + +// CHECK: vpmovsqb %ymm17, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x09] + vpmovsqb %ymm17, (%rcx) + +// CHECK: vpmovsqb %ymm17, (%rcx) {%k4} +// CHECK: encoding: [0x62,0xe2,0x7e,0x2c,0x22,0x09] + vpmovsqb %ymm17, (%rcx) {%k4} + +// CHECK: vpmovsqb %ymm17, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpmovsqb %ymm17, 291(%rax,%r14,8) + +// CHECK: vpmovsqb %ymm17, 508(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x4a,0x7f] + vpmovsqb %ymm17, 508(%rdx) + +// CHECK: vpmovsqb %ymm17, 512(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x8a,0x00,0x02,0x00,0x00] + vpmovsqb %ymm17, 512(%rdx) + +// CHECK: vpmovsqb %ymm17, -512(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x4a,0x80] + vpmovsqb %ymm17, -512(%rdx) + +// CHECK: vpmovsqb %ymm17, -516(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x8a,0xfc,0xfd,0xff,0xff] + vpmovsqb %ymm17, -516(%rdx) + +// CHECK: vpmovusqb %xmm22, %xmm28 +// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x12,0xf4] + vpmovusqb %xmm22, %xmm28 + +// CHECK: vpmovusqb %xmm22, %xmm28 {%k2} +// CHECK: encoding: [0x62,0x82,0x7e,0x0a,0x12,0xf4] + vpmovusqb %xmm22, %xmm28 {%k2} + +// CHECK: vpmovusqb %xmm22, %xmm28 {%k2} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0x8a,0x12,0xf4] + vpmovusqb %xmm22, %xmm28 {%k2} {z} + +// CHECK: vpmovusqb %ymm23, %xmm22 +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x12,0xfe] + vpmovusqb %ymm23, %xmm22 + +// CHECK: vpmovusqb %ymm23, %xmm22 {%k7} +// CHECK: encoding: [0x62,0xa2,0x7e,0x2f,0x12,0xfe] + vpmovusqb %ymm23, %xmm22 {%k7} + +// CHECK: vpmovusqb %ymm23, %xmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0xaf,0x12,0xfe] + vpmovusqb %ymm23, %xmm22 {%k7} {z} + +// CHECK: vpmovusqb %xmm26, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x11] + vpmovusqb %xmm26, (%rcx) + +// CHECK: vpmovusqb %xmm26, (%rcx) {%k5} +// CHECK: encoding: [0x62,0x62,0x7e,0x0d,0x12,0x11] + vpmovusqb %xmm26, (%rcx) {%k5} + +// CHECK: vpmovusqb %xmm26, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x12,0x94,0xf0,0x23,0x01,0x00,0x00] + vpmovusqb %xmm26, 291(%rax,%r14,8) + +// CHECK: vpmovusqb %xmm26, 254(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x52,0x7f] + vpmovusqb %xmm26, 254(%rdx) + +// CHECK: vpmovusqb %xmm26, 256(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x92,0x00,0x01,0x00,0x00] + vpmovusqb %xmm26, 256(%rdx) + +// CHECK: vpmovusqb %xmm26, -256(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x52,0x80] + vpmovusqb %xmm26, -256(%rdx) + +// CHECK: vpmovusqb %xmm26, -258(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x92,0xfe,0xfe,0xff,0xff] + vpmovusqb %xmm26, -258(%rdx) + +// CHECK: vpmovusqb %ymm30, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x31] + vpmovusqb %ymm30, (%rcx) + +// CHECK: vpmovusqb %ymm30, (%rcx) {%k2} +// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x12,0x31] + vpmovusqb %ymm30, (%rcx) {%k2} + +// CHECK: vpmovusqb %ymm30, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x12,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpmovusqb %ymm30, 291(%rax,%r14,8) + +// CHECK: vpmovusqb %ymm30, 508(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x72,0x7f] + vpmovusqb %ymm30, 508(%rdx) + +// CHECK: vpmovusqb %ymm30, 512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0xb2,0x00,0x02,0x00,0x00] + vpmovusqb %ymm30, 512(%rdx) + +// CHECK: vpmovusqb %ymm30, -512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x72,0x80] + vpmovusqb %ymm30, -512(%rdx) + +// CHECK: vpmovusqb %ymm30, -516(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0xb2,0xfc,0xfd,0xff,0xff] + vpmovusqb %ymm30, -516(%rdx) + +// CHECK: vpmovqw %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x34,0xd3] + vpmovqw %xmm18, %xmm19 + +// CHECK: vpmovqw %xmm18, %xmm19 {%k4} +// CHECK: encoding: [0x62,0xa2,0x7e,0x0c,0x34,0xd3] + vpmovqw %xmm18, %xmm19 {%k4} + +// CHECK: vpmovqw %xmm18, %xmm19 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0x8c,0x34,0xd3] + vpmovqw %xmm18, %xmm19 {%k4} {z} + +// CHECK: vpmovqw %ymm22, %xmm19 +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x34,0xf3] + vpmovqw %ymm22, %xmm19 + +// CHECK: vpmovqw %ymm22, %xmm19 {%k5} +// CHECK: encoding: [0x62,0xa2,0x7e,0x2d,0x34,0xf3] + vpmovqw %ymm22, %xmm19 {%k5} + +// CHECK: vpmovqw %ymm22, %xmm19 {%k5} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0xad,0x34,0xf3] + vpmovqw %ymm22, %xmm19 {%k5} {z} + +// CHECK: vpmovqw %xmm21, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x29] + vpmovqw %xmm21, (%rcx) + +// CHECK: vpmovqw %xmm21, (%rcx) {%k3} +// CHECK: encoding: [0x62,0xe2,0x7e,0x0b,0x34,0x29] + vpmovqw %xmm21, (%rcx) {%k3} + +// CHECK: vpmovqw %xmm21, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x34,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovqw %xmm21, 291(%rax,%r14,8) + +// CHECK: vpmovqw %xmm21, 508(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x6a,0x7f] + vpmovqw %xmm21, 508(%rdx) + +// CHECK: vpmovqw %xmm21, 512(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0xaa,0x00,0x02,0x00,0x00] + vpmovqw %xmm21, 512(%rdx) + +// CHECK: vpmovqw %xmm21, -512(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x6a,0x80] + vpmovqw %xmm21, -512(%rdx) + +// CHECK: vpmovqw %xmm21, -516(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0xaa,0xfc,0xfd,0xff,0xff] + vpmovqw %xmm21, -516(%rdx) + +// CHECK: vpmovqw %ymm28, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x21] + vpmovqw %ymm28, (%rcx) + +// CHECK: vpmovqw %ymm28, (%rcx) {%k6} +// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x34,0x21] + vpmovqw %ymm28, (%rcx) {%k6} + +// CHECK: vpmovqw %ymm28, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x34,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpmovqw %ymm28, 291(%rax,%r14,8) + +// CHECK: vpmovqw %ymm28, 1016(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x62,0x7f] + vpmovqw %ymm28, 1016(%rdx) + +// CHECK: vpmovqw %ymm28, 1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0xa2,0x00,0x04,0x00,0x00] + vpmovqw %ymm28, 1024(%rdx) + +// CHECK: vpmovqw %ymm28, -1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x62,0x80] + vpmovqw %ymm28, -1024(%rdx) + +// CHECK: vpmovqw %ymm28, -1032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0xa2,0xf8,0xfb,0xff,0xff] + vpmovqw %ymm28, -1032(%rdx) + +// CHECK: vpmovsqw %xmm18, %xmm26 +// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x24,0xd2] + vpmovsqw %xmm18, %xmm26 + +// CHECK: vpmovsqw %xmm18, %xmm26 {%k7} +// CHECK: encoding: [0x62,0x82,0x7e,0x0f,0x24,0xd2] + vpmovsqw %xmm18, %xmm26 {%k7} + +// CHECK: vpmovsqw %xmm18, %xmm26 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0x8f,0x24,0xd2] + vpmovsqw %xmm18, %xmm26 {%k7} {z} + +// CHECK: vpmovsqw %ymm20, %xmm28 +// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x24,0xe4] + vpmovsqw %ymm20, %xmm28 + +// CHECK: vpmovsqw %ymm20, %xmm28 {%k4} +// CHECK: encoding: [0x62,0x82,0x7e,0x2c,0x24,0xe4] + vpmovsqw %ymm20, %xmm28 {%k4} + +// CHECK: vpmovsqw %ymm20, %xmm28 {%k4} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0xac,0x24,0xe4] + vpmovsqw %ymm20, %xmm28 {%k4} {z} + +// CHECK: vpmovsqw %xmm30, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x31] + vpmovsqw %xmm30, (%rcx) + +// CHECK: vpmovsqw %xmm30, (%rcx) {%k4} +// CHECK: encoding: [0x62,0x62,0x7e,0x0c,0x24,0x31] + vpmovsqw %xmm30, (%rcx) {%k4} + +// CHECK: vpmovsqw %xmm30, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x24,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpmovsqw %xmm30, 291(%rax,%r14,8) + +// CHECK: vpmovsqw %xmm30, 508(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x72,0x7f] + vpmovsqw %xmm30, 508(%rdx) + +// CHECK: vpmovsqw %xmm30, 512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0xb2,0x00,0x02,0x00,0x00] + vpmovsqw %xmm30, 512(%rdx) + +// CHECK: vpmovsqw %xmm30, -512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x72,0x80] + vpmovsqw %xmm30, -512(%rdx) + +// CHECK: vpmovsqw %xmm30, -516(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0xb2,0xfc,0xfd,0xff,0xff] + vpmovsqw %xmm30, -516(%rdx) + +// CHECK: vpmovsqw %ymm21, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x29] + vpmovsqw %ymm21, (%rcx) + +// CHECK: vpmovsqw %ymm21, (%rcx) {%k5} +// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x24,0x29] + vpmovsqw %ymm21, (%rcx) {%k5} + +// CHECK: vpmovsqw %ymm21, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x24,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovsqw %ymm21, 291(%rax,%r14,8) + +// CHECK: vpmovsqw %ymm21, 1016(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x6a,0x7f] + vpmovsqw %ymm21, 1016(%rdx) + +// CHECK: vpmovsqw %ymm21, 1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0xaa,0x00,0x04,0x00,0x00] + vpmovsqw %ymm21, 1024(%rdx) + +// CHECK: vpmovsqw %ymm21, -1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x6a,0x80] + vpmovsqw %ymm21, -1024(%rdx) + +// CHECK: vpmovsqw %ymm21, -1032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0xaa,0xf8,0xfb,0xff,0xff] + vpmovsqw %ymm21, -1032(%rdx) + +// CHECK: vpmovusqw %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x14,0xe5] + vpmovusqw %xmm20, %xmm29 + +// CHECK: vpmovusqw %xmm20, %xmm29 {%k1} +// CHECK: encoding: [0x62,0x82,0x7e,0x09,0x14,0xe5] + vpmovusqw %xmm20, %xmm29 {%k1} + +// CHECK: vpmovusqw %xmm20, %xmm29 {%k1} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0x89,0x14,0xe5] + vpmovusqw %xmm20, %xmm29 {%k1} {z} + +// CHECK: vpmovusqw %ymm21, %xmm20 +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x14,0xec] + vpmovusqw %ymm21, %xmm20 + +// CHECK: vpmovusqw %ymm21, %xmm20 {%k5} +// CHECK: encoding: [0x62,0xa2,0x7e,0x2d,0x14,0xec] + vpmovusqw %ymm21, %xmm20 {%k5} + +// CHECK: vpmovusqw %ymm21, %xmm20 {%k5} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0xad,0x14,0xec] + vpmovusqw %ymm21, %xmm20 {%k5} {z} + +// CHECK: vpmovusqw %xmm18, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x11] + vpmovusqw %xmm18, (%rcx) + +// CHECK: vpmovusqw %xmm18, (%rcx) {%k1} +// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x14,0x11] + vpmovusqw %xmm18, (%rcx) {%k1} + +// CHECK: vpmovusqw %xmm18, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x14,0x94,0xf0,0x23,0x01,0x00,0x00] + vpmovusqw %xmm18, 291(%rax,%r14,8) + +// CHECK: vpmovusqw %xmm18, 508(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x52,0x7f] + vpmovusqw %xmm18, 508(%rdx) + +// CHECK: vpmovusqw %xmm18, 512(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x92,0x00,0x02,0x00,0x00] + vpmovusqw %xmm18, 512(%rdx) + +// CHECK: vpmovusqw %xmm18, -512(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x52,0x80] + vpmovusqw %xmm18, -512(%rdx) + +// CHECK: vpmovusqw %xmm18, -516(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x92,0xfc,0xfd,0xff,0xff] + vpmovusqw %xmm18, -516(%rdx) + +// CHECK: vpmovusqw %ymm18, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x11] + vpmovusqw %ymm18, (%rcx) + +// CHECK: vpmovusqw %ymm18, (%rcx) {%k2} +// CHECK: encoding: [0x62,0xe2,0x7e,0x2a,0x14,0x11] + vpmovusqw %ymm18, (%rcx) {%k2} + +// CHECK: vpmovusqw %ymm18, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x14,0x94,0xf0,0x23,0x01,0x00,0x00] + vpmovusqw %ymm18, 291(%rax,%r14,8) + +// CHECK: vpmovusqw %ymm18, 1016(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x52,0x7f] + vpmovusqw %ymm18, 1016(%rdx) + +// CHECK: vpmovusqw %ymm18, 1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x92,0x00,0x04,0x00,0x00] + vpmovusqw %ymm18, 1024(%rdx) + +// CHECK: vpmovusqw %ymm18, -1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x52,0x80] + vpmovusqw %ymm18, -1024(%rdx) + +// CHECK: vpmovusqw %ymm18, -1032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x92,0xf8,0xfb,0xff,0xff] + vpmovusqw %ymm18, -1032(%rdx) + +// CHECK: vpmovqd %xmm25, %xmm21 +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x35,0xcd] + vpmovqd %xmm25, %xmm21 + +// CHECK: vpmovqd %xmm25, %xmm21 {%k5} +// CHECK: encoding: [0x62,0x22,0x7e,0x0d,0x35,0xcd] + vpmovqd %xmm25, %xmm21 {%k5} + +// CHECK: vpmovqd %xmm25, %xmm21 {%k5} {z} +// CHECK: encoding: [0x62,0x22,0x7e,0x8d,0x35,0xcd] + vpmovqd %xmm25, %xmm21 {%k5} {z} + +// CHECK: vpmovqd %ymm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x35,0xf5] + vpmovqd %ymm22, %xmm21 + +// CHECK: vpmovqd %ymm22, %xmm21 {%k6} +// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x35,0xf5] + vpmovqd %ymm22, %xmm21 {%k6} + +// CHECK: vpmovqd %ymm22, %xmm21 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x35,0xf5] + vpmovqd %ymm22, %xmm21 {%k6} {z} + +// CHECK: vpmovqd %xmm29, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x29] + vpmovqd %xmm29, (%rcx) + +// CHECK: vpmovqd %xmm29, (%rcx) {%k6} +// CHECK: encoding: [0x62,0x62,0x7e,0x0e,0x35,0x29] + vpmovqd %xmm29, (%rcx) {%k6} + +// CHECK: vpmovqd %xmm29, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x35,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovqd %xmm29, 291(%rax,%r14,8) + +// CHECK: vpmovqd %xmm29, 1016(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x6a,0x7f] + vpmovqd %xmm29, 1016(%rdx) + +// CHECK: vpmovqd %xmm29, 1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0xaa,0x00,0x04,0x00,0x00] + vpmovqd %xmm29, 1024(%rdx) + +// CHECK: vpmovqd %xmm29, -1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x6a,0x80] + vpmovqd %xmm29, -1024(%rdx) + +// CHECK: vpmovqd %xmm29, -1032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0xaa,0xf8,0xfb,0xff,0xff] + vpmovqd %xmm29, -1032(%rdx) + +// CHECK: vpmovqd %ymm30, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x31] + vpmovqd %ymm30, (%rcx) + +// CHECK: vpmovqd %ymm30, (%rcx) {%k2} +// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x35,0x31] + vpmovqd %ymm30, (%rcx) {%k2} + +// CHECK: vpmovqd %ymm30, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x35,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpmovqd %ymm30, 291(%rax,%r14,8) + +// CHECK: vpmovqd %ymm30, 2032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x72,0x7f] + vpmovqd %ymm30, 2032(%rdx) + +// CHECK: vpmovqd %ymm30, 2048(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0xb2,0x00,0x08,0x00,0x00] + vpmovqd %ymm30, 2048(%rdx) + +// CHECK: vpmovqd %ymm30, -2048(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x72,0x80] + vpmovqd %ymm30, -2048(%rdx) + +// CHECK: vpmovqd %ymm30, -2064(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0xb2,0xf0,0xf7,0xff,0xff] + vpmovqd %ymm30, -2064(%rdx) + +// CHECK: vpmovsqd %xmm21, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x25,0xed] + vpmovsqd %xmm21, %xmm21 + +// CHECK: vpmovsqd %xmm21, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7e,0x0a,0x25,0xed] + vpmovsqd %xmm21, %xmm21 {%k2} + +// CHECK: vpmovsqd %xmm21, %xmm21 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0x8a,0x25,0xed] + vpmovsqd %xmm21, %xmm21 {%k2} {z} + +// CHECK: vpmovsqd %ymm29, %xmm29 +// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x25,0xed] + vpmovsqd %ymm29, %xmm29 + +// CHECK: vpmovsqd %ymm29, %xmm29 {%k4} +// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x25,0xed] + vpmovsqd %ymm29, %xmm29 {%k4} + +// CHECK: vpmovsqd %ymm29, %xmm29 {%k4} {z} +// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x25,0xed] + vpmovsqd %ymm29, %xmm29 {%k4} {z} + +// CHECK: vpmovsqd %xmm17, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x09] + vpmovsqd %xmm17, (%rcx) + +// CHECK: vpmovsqd %xmm17, (%rcx) {%k2} +// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x25,0x09] + vpmovsqd %xmm17, (%rcx) {%k2} + +// CHECK: vpmovsqd %xmm17, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x25,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpmovsqd %xmm17, 291(%rax,%r14,8) + +// CHECK: vpmovsqd %xmm17, 1016(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x4a,0x7f] + vpmovsqd %xmm17, 1016(%rdx) + +// CHECK: vpmovsqd %xmm17, 1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x8a,0x00,0x04,0x00,0x00] + vpmovsqd %xmm17, 1024(%rdx) + +// CHECK: vpmovsqd %xmm17, -1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x4a,0x80] + vpmovsqd %xmm17, -1024(%rdx) + +// CHECK: vpmovsqd %xmm17, -1032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x8a,0xf8,0xfb,0xff,0xff] + vpmovsqd %xmm17, -1032(%rdx) + +// CHECK: vpmovsqd %ymm23, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x39] + vpmovsqd %ymm23, (%rcx) + +// CHECK: vpmovsqd %ymm23, (%rcx) {%k5} +// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x25,0x39] + vpmovsqd %ymm23, (%rcx) {%k5} + +// CHECK: vpmovsqd %ymm23, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpmovsqd %ymm23, 291(%rax,%r14,8) + +// CHECK: vpmovsqd %ymm23, 2032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x7a,0x7f] + vpmovsqd %ymm23, 2032(%rdx) + +// CHECK: vpmovsqd %ymm23, 2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0xba,0x00,0x08,0x00,0x00] + vpmovsqd %ymm23, 2048(%rdx) + +// CHECK: vpmovsqd %ymm23, -2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x7a,0x80] + vpmovsqd %ymm23, -2048(%rdx) + +// CHECK: vpmovsqd %ymm23, -2064(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0xba,0xf0,0xf7,0xff,0xff] + vpmovsqd %ymm23, -2064(%rdx) + +// CHECK: vpmovusqd %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x15,0xe9] + vpmovusqd %xmm21, %xmm25 + +// CHECK: vpmovusqd %xmm21, %xmm25 {%k5} +// CHECK: encoding: [0x62,0x82,0x7e,0x0d,0x15,0xe9] + vpmovusqd %xmm21, %xmm25 {%k5} + +// CHECK: vpmovusqd %xmm21, %xmm25 {%k5} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0x8d,0x15,0xe9] + vpmovusqd %xmm21, %xmm25 {%k5} {z} + +// CHECK: vpmovusqd %ymm21, %xmm20 +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x15,0xec] + vpmovusqd %ymm21, %xmm20 + +// CHECK: vpmovusqd %ymm21, %xmm20 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7e,0x2a,0x15,0xec] + vpmovusqd %ymm21, %xmm20 {%k2} + +// CHECK: vpmovusqd %ymm21, %xmm20 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0xaa,0x15,0xec] + vpmovusqd %ymm21, %xmm20 {%k2} {z} + +// CHECK: vpmovusqd %xmm18, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x11] + vpmovusqd %xmm18, (%rcx) + +// CHECK: vpmovusqd %xmm18, (%rcx) {%k1} +// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x15,0x11] + vpmovusqd %xmm18, (%rcx) {%k1} + +// CHECK: vpmovusqd %xmm18, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x15,0x94,0xf0,0x23,0x01,0x00,0x00] + vpmovusqd %xmm18, 291(%rax,%r14,8) + +// CHECK: vpmovusqd %xmm18, 1016(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x52,0x7f] + vpmovusqd %xmm18, 1016(%rdx) + +// CHECK: vpmovusqd %xmm18, 1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x92,0x00,0x04,0x00,0x00] + vpmovusqd %xmm18, 1024(%rdx) + +// CHECK: vpmovusqd %xmm18, -1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x52,0x80] + vpmovusqd %xmm18, -1024(%rdx) + +// CHECK: vpmovusqd %xmm18, -1032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x92,0xf8,0xfb,0xff,0xff] + vpmovusqd %xmm18, -1032(%rdx) + +// CHECK: vpmovusqd %ymm29, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x29] + vpmovusqd %ymm29, (%rcx) + +// CHECK: vpmovusqd %ymm29, (%rcx) {%k6} +// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x15,0x29] + vpmovusqd %ymm29, (%rcx) {%k6} + +// CHECK: vpmovusqd %ymm29, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x15,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovusqd %ymm29, 291(%rax,%r14,8) + +// CHECK: vpmovusqd %ymm29, 2032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x6a,0x7f] + vpmovusqd %ymm29, 2032(%rdx) + +// CHECK: vpmovusqd %ymm29, 2048(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0xaa,0x00,0x08,0x00,0x00] + vpmovusqd %ymm29, 2048(%rdx) + +// CHECK: vpmovusqd %ymm29, -2048(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x6a,0x80] + vpmovusqd %ymm29, -2048(%rdx) + +// CHECK: vpmovusqd %ymm29, -2064(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0xaa,0xf0,0xf7,0xff,0xff] + vpmovusqd %ymm29, -2064(%rdx) + +// CHECK: vpmovdb %xmm21, %xmm30 +// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x31,0xee] + vpmovdb %xmm21, %xmm30 + +// CHECK: vpmovdb %xmm21, %xmm30 {%k3} +// CHECK: encoding: [0x62,0x82,0x7e,0x0b,0x31,0xee] + vpmovdb %xmm21, %xmm30 {%k3} + +// CHECK: vpmovdb %xmm21, %xmm30 {%k3} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0x8b,0x31,0xee] + vpmovdb %xmm21, %xmm30 {%k3} {z} + +// CHECK: vpmovdb %ymm21, %xmm23 +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x31,0xef] + vpmovdb %ymm21, %xmm23 + +// CHECK: vpmovdb %ymm21, %xmm23 {%k4} +// CHECK: encoding: [0x62,0xa2,0x7e,0x2c,0x31,0xef] + vpmovdb %ymm21, %xmm23 {%k4} + +// CHECK: vpmovdb %ymm21, %xmm23 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0xac,0x31,0xef] + vpmovdb %ymm21, %xmm23 {%k4} {z} + +// CHECK: vpmovdb %xmm29, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x29] + vpmovdb %xmm29, (%rcx) + +// CHECK: vpmovdb %xmm29, (%rcx) {%k3} +// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x31,0x29] + vpmovdb %xmm29, (%rcx) {%k3} + +// CHECK: vpmovdb %xmm29, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x31,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovdb %xmm29, 291(%rax,%r14,8) + +// CHECK: vpmovdb %xmm29, 508(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x6a,0x7f] + vpmovdb %xmm29, 508(%rdx) + +// CHECK: vpmovdb %xmm29, 512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0xaa,0x00,0x02,0x00,0x00] + vpmovdb %xmm29, 512(%rdx) + +// CHECK: vpmovdb %xmm29, -512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x6a,0x80] + vpmovdb %xmm29, -512(%rdx) + +// CHECK: vpmovdb %xmm29, -516(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0xaa,0xfc,0xfd,0xff,0xff] + vpmovdb %xmm29, -516(%rdx) + +// CHECK: vpmovdb %ymm26, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x11] + vpmovdb %ymm26, (%rcx) + +// CHECK: vpmovdb %ymm26, (%rcx) {%k6} +// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x31,0x11] + vpmovdb %ymm26, (%rcx) {%k6} + +// CHECK: vpmovdb %ymm26, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x31,0x94,0xf0,0x23,0x01,0x00,0x00] + vpmovdb %ymm26, 291(%rax,%r14,8) + +// CHECK: vpmovdb %ymm26, 1016(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x52,0x7f] + vpmovdb %ymm26, 1016(%rdx) + +// CHECK: vpmovdb %ymm26, 1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x92,0x00,0x04,0x00,0x00] + vpmovdb %ymm26, 1024(%rdx) + +// CHECK: vpmovdb %ymm26, -1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x52,0x80] + vpmovdb %ymm26, -1024(%rdx) + +// CHECK: vpmovdb %ymm26, -1032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x92,0xf8,0xfb,0xff,0xff] + vpmovdb %ymm26, -1032(%rdx) + +// CHECK: vpmovsdb %xmm27, %xmm30 +// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x21,0xde] + vpmovsdb %xmm27, %xmm30 + +// CHECK: vpmovsdb %xmm27, %xmm30 {%k1} +// CHECK: encoding: [0x62,0x02,0x7e,0x09,0x21,0xde] + vpmovsdb %xmm27, %xmm30 {%k1} + +// CHECK: vpmovsdb %xmm27, %xmm30 {%k1} {z} +// CHECK: encoding: [0x62,0x02,0x7e,0x89,0x21,0xde] + vpmovsdb %xmm27, %xmm30 {%k1} {z} + +// CHECK: vpmovsdb %ymm27, %xmm26 +// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x21,0xda] + vpmovsdb %ymm27, %xmm26 + +// CHECK: vpmovsdb %ymm27, %xmm26 {%k3} +// CHECK: encoding: [0x62,0x02,0x7e,0x2b,0x21,0xda] + vpmovsdb %ymm27, %xmm26 {%k3} + +// CHECK: vpmovsdb %ymm27, %xmm26 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0x7e,0xab,0x21,0xda] + vpmovsdb %ymm27, %xmm26 {%k3} {z} + +// CHECK: vpmovsdb %xmm30, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x31] + vpmovsdb %xmm30, (%rcx) + +// CHECK: vpmovsdb %xmm30, (%rcx) {%k3} +// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x21,0x31] + vpmovsdb %xmm30, (%rcx) {%k3} + +// CHECK: vpmovsdb %xmm30, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x21,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpmovsdb %xmm30, 291(%rax,%r14,8) + +// CHECK: vpmovsdb %xmm30, 508(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x72,0x7f] + vpmovsdb %xmm30, 508(%rdx) + +// CHECK: vpmovsdb %xmm30, 512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0xb2,0x00,0x02,0x00,0x00] + vpmovsdb %xmm30, 512(%rdx) + +// CHECK: vpmovsdb %xmm30, -512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x72,0x80] + vpmovsdb %xmm30, -512(%rdx) + +// CHECK: vpmovsdb %xmm30, -516(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0xb2,0xfc,0xfd,0xff,0xff] + vpmovsdb %xmm30, -516(%rdx) + +// CHECK: vpmovsdb %ymm25, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x09] + vpmovsdb %ymm25, (%rcx) + +// CHECK: vpmovsdb %ymm25, (%rcx) {%k5} +// CHECK: encoding: [0x62,0x62,0x7e,0x2d,0x21,0x09] + vpmovsdb %ymm25, (%rcx) {%k5} + +// CHECK: vpmovsdb %ymm25, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x21,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpmovsdb %ymm25, 291(%rax,%r14,8) + +// CHECK: vpmovsdb %ymm25, 1016(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x4a,0x7f] + vpmovsdb %ymm25, 1016(%rdx) + +// CHECK: vpmovsdb %ymm25, 1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x8a,0x00,0x04,0x00,0x00] + vpmovsdb %ymm25, 1024(%rdx) + +// CHECK: vpmovsdb %ymm25, -1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x4a,0x80] + vpmovsdb %ymm25, -1024(%rdx) + +// CHECK: vpmovsdb %ymm25, -1032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x8a,0xf8,0xfb,0xff,0xff] + vpmovsdb %ymm25, -1032(%rdx) + +// CHECK: vpmovusdb %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x11,0xee] + vpmovusdb %xmm29, %xmm30 + +// CHECK: vpmovusdb %xmm29, %xmm30 {%k7} +// CHECK: encoding: [0x62,0x02,0x7e,0x0f,0x11,0xee] + vpmovusdb %xmm29, %xmm30 {%k7} + +// CHECK: vpmovusdb %xmm29, %xmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x02,0x7e,0x8f,0x11,0xee] + vpmovusdb %xmm29, %xmm30 {%k7} {z} + +// CHECK: vpmovusdb %ymm17, %xmm23 +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x11,0xcf] + vpmovusdb %ymm17, %xmm23 + +// CHECK: vpmovusdb %ymm17, %xmm23 {%k6} +// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x11,0xcf] + vpmovusdb %ymm17, %xmm23 {%k6} + +// CHECK: vpmovusdb %ymm17, %xmm23 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x11,0xcf] + vpmovusdb %ymm17, %xmm23 {%k6} {z} + +// CHECK: vpmovusdb %xmm26, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x11] + vpmovusdb %xmm26, (%rcx) + +// CHECK: vpmovusdb %xmm26, (%rcx) {%k7} +// CHECK: encoding: [0x62,0x62,0x7e,0x0f,0x11,0x11] + vpmovusdb %xmm26, (%rcx) {%k7} + +// CHECK: vpmovusdb %xmm26, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x11,0x94,0xf0,0x23,0x01,0x00,0x00] + vpmovusdb %xmm26, 291(%rax,%r14,8) + +// CHECK: vpmovusdb %xmm26, 508(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x52,0x7f] + vpmovusdb %xmm26, 508(%rdx) + +// CHECK: vpmovusdb %xmm26, 512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x92,0x00,0x02,0x00,0x00] + vpmovusdb %xmm26, 512(%rdx) + +// CHECK: vpmovusdb %xmm26, -512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x52,0x80] + vpmovusdb %xmm26, -512(%rdx) + +// CHECK: vpmovusdb %xmm26, -516(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x92,0xfc,0xfd,0xff,0xff] + vpmovusdb %xmm26, -516(%rdx) + +// CHECK: vpmovusdb %ymm25, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x09] + vpmovusdb %ymm25, (%rcx) + +// CHECK: vpmovusdb %ymm25, (%rcx) {%k6} +// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x11,0x09] + vpmovusdb %ymm25, (%rcx) {%k6} + +// CHECK: vpmovusdb %ymm25, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x11,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpmovusdb %ymm25, 291(%rax,%r14,8) + +// CHECK: vpmovusdb %ymm25, 1016(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x4a,0x7f] + vpmovusdb %ymm25, 1016(%rdx) + +// CHECK: vpmovusdb %ymm25, 1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x8a,0x00,0x04,0x00,0x00] + vpmovusdb %ymm25, 1024(%rdx) + +// CHECK: vpmovusdb %ymm25, -1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x4a,0x80] + vpmovusdb %ymm25, -1024(%rdx) + +// CHECK: vpmovusdb %ymm25, -1032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x8a,0xf8,0xfb,0xff,0xff] + vpmovusdb %ymm25, -1032(%rdx) + +// CHECK: vpmovdw %xmm25, %xmm17 +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x33,0xc9] + vpmovdw %xmm25, %xmm17 + +// CHECK: vpmovdw %xmm25, %xmm17 {%k5} +// CHECK: encoding: [0x62,0x22,0x7e,0x0d,0x33,0xc9] + vpmovdw %xmm25, %xmm17 {%k5} + +// CHECK: vpmovdw %xmm25, %xmm17 {%k5} {z} +// CHECK: encoding: [0x62,0x22,0x7e,0x8d,0x33,0xc9] + vpmovdw %xmm25, %xmm17 {%k5} {z} + +// CHECK: vpmovdw %ymm19, %xmm25 +// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x33,0xd9] + vpmovdw %ymm19, %xmm25 + +// CHECK: vpmovdw %ymm19, %xmm25 {%k4} +// CHECK: encoding: [0x62,0x82,0x7e,0x2c,0x33,0xd9] + vpmovdw %ymm19, %xmm25 {%k4} + +// CHECK: vpmovdw %ymm19, %xmm25 {%k4} {z} +// CHECK: encoding: [0x62,0x82,0x7e,0xac,0x33,0xd9] + vpmovdw %ymm19, %xmm25 {%k4} {z} + +// CHECK: vpmovdw %xmm21, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x29] + vpmovdw %xmm21, (%rcx) + +// CHECK: vpmovdw %xmm21, (%rcx) {%k2} +// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x33,0x29] + vpmovdw %xmm21, (%rcx) {%k2} + +// CHECK: vpmovdw %xmm21, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x33,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovdw %xmm21, 291(%rax,%r14,8) + +// CHECK: vpmovdw %xmm21, 1016(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x6a,0x7f] + vpmovdw %xmm21, 1016(%rdx) + +// CHECK: vpmovdw %xmm21, 1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0xaa,0x00,0x04,0x00,0x00] + vpmovdw %xmm21, 1024(%rdx) + +// CHECK: vpmovdw %xmm21, -1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x6a,0x80] + vpmovdw %xmm21, -1024(%rdx) + +// CHECK: vpmovdw %xmm21, -1032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0xaa,0xf8,0xfb,0xff,0xff] + vpmovdw %xmm21, -1032(%rdx) + +// CHECK: vpmovdw %ymm22, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x31] + vpmovdw %ymm22, (%rcx) + +// CHECK: vpmovdw %ymm22, (%rcx) {%k6} +// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x33,0x31] + vpmovdw %ymm22, (%rcx) {%k6} + +// CHECK: vpmovdw %ymm22, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x33,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpmovdw %ymm22, 291(%rax,%r14,8) + +// CHECK: vpmovdw %ymm22, 2032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x72,0x7f] + vpmovdw %ymm22, 2032(%rdx) + +// CHECK: vpmovdw %ymm22, 2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0xb2,0x00,0x08,0x00,0x00] + vpmovdw %ymm22, 2048(%rdx) + +// CHECK: vpmovdw %ymm22, -2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x72,0x80] + vpmovdw %ymm22, -2048(%rdx) + +// CHECK: vpmovdw %ymm22, -2064(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0xb2,0xf0,0xf7,0xff,0xff] + vpmovdw %ymm22, -2064(%rdx) + +// CHECK: vpmovsdw %xmm18, %xmm18 +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x23,0xd2] + vpmovsdw %xmm18, %xmm18 + +// CHECK: vpmovsdw %xmm18, %xmm18 {%k6} +// CHECK: encoding: [0x62,0xa2,0x7e,0x0e,0x23,0xd2] + vpmovsdw %xmm18, %xmm18 {%k6} + +// CHECK: vpmovsdw %xmm18, %xmm18 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0x8e,0x23,0xd2] + vpmovsdw %xmm18, %xmm18 {%k6} {z} + +// CHECK: vpmovsdw %ymm18, %xmm20 +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x23,0xd4] + vpmovsdw %ymm18, %xmm20 + +// CHECK: vpmovsdw %ymm18, %xmm20 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7e,0x2a,0x23,0xd4] + vpmovsdw %ymm18, %xmm20 {%k2} + +// CHECK: vpmovsdw %ymm18, %xmm20 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0xaa,0x23,0xd4] + vpmovsdw %ymm18, %xmm20 {%k2} {z} + +// CHECK: vpmovsdw %xmm29, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x29] + vpmovsdw %xmm29, (%rcx) + +// CHECK: vpmovsdw %xmm29, (%rcx) {%k1} +// CHECK: encoding: [0x62,0x62,0x7e,0x09,0x23,0x29] + vpmovsdw %xmm29, (%rcx) {%k1} + +// CHECK: vpmovsdw %xmm29, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x23,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovsdw %xmm29, 291(%rax,%r14,8) + +// CHECK: vpmovsdw %xmm29, 1016(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x6a,0x7f] + vpmovsdw %xmm29, 1016(%rdx) + +// CHECK: vpmovsdw %xmm29, 1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0xaa,0x00,0x04,0x00,0x00] + vpmovsdw %xmm29, 1024(%rdx) + +// CHECK: vpmovsdw %xmm29, -1024(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x6a,0x80] + vpmovsdw %xmm29, -1024(%rdx) + +// CHECK: vpmovsdw %xmm29, -1032(%rdx) +// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0xaa,0xf8,0xfb,0xff,0xff] + vpmovsdw %xmm29, -1032(%rdx) + +// CHECK: vpmovsdw %ymm19, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x19] + vpmovsdw %ymm19, (%rcx) + +// CHECK: vpmovsdw %ymm19, (%rcx) {%k6} +// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x23,0x19] + vpmovsdw %ymm19, (%rcx) {%k6} + +// CHECK: vpmovsdw %ymm19, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x23,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpmovsdw %ymm19, 291(%rax,%r14,8) + +// CHECK: vpmovsdw %ymm19, 2032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x5a,0x7f] + vpmovsdw %ymm19, 2032(%rdx) + +// CHECK: vpmovsdw %ymm19, 2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x9a,0x00,0x08,0x00,0x00] + vpmovsdw %ymm19, 2048(%rdx) + +// CHECK: vpmovsdw %ymm19, -2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x5a,0x80] + vpmovsdw %ymm19, -2048(%rdx) + +// CHECK: vpmovsdw %ymm19, -2064(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x9a,0xf0,0xf7,0xff,0xff] + vpmovsdw %ymm19, -2064(%rdx) + +// CHECK: vpmovusdw %xmm18, %xmm18 +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x13,0xd2] + vpmovusdw %xmm18, %xmm18 + +// CHECK: vpmovusdw %xmm18, %xmm18 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7e,0x0a,0x13,0xd2] + vpmovusdw %xmm18, %xmm18 {%k2} + +// CHECK: vpmovusdw %xmm18, %xmm18 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0x7e,0x8a,0x13,0xd2] + vpmovusdw %xmm18, %xmm18 {%k2} {z} + +// CHECK: vpmovusdw %ymm25, %xmm28 +// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x13,0xcc] + vpmovusdw %ymm25, %xmm28 + +// CHECK: vpmovusdw %ymm25, %xmm28 {%k4} +// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x13,0xcc] + vpmovusdw %ymm25, %xmm28 {%k4} + +// CHECK: vpmovusdw %ymm25, %xmm28 {%k4} {z} +// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x13,0xcc] + vpmovusdw %ymm25, %xmm28 {%k4} {z} + +// CHECK: vpmovusdw %xmm20, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x21] + vpmovusdw %xmm20, (%rcx) + +// CHECK: vpmovusdw %xmm20, (%rcx) {%k6} +// CHECK: encoding: [0x62,0xe2,0x7e,0x0e,0x13,0x21] + vpmovusdw %xmm20, (%rcx) {%k6} + +// CHECK: vpmovusdw %xmm20, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x13,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpmovusdw %xmm20, 291(%rax,%r14,8) + +// CHECK: vpmovusdw %xmm20, 1016(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x62,0x7f] + vpmovusdw %xmm20, 1016(%rdx) + +// CHECK: vpmovusdw %xmm20, 1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0xa2,0x00,0x04,0x00,0x00] + vpmovusdw %xmm20, 1024(%rdx) + +// CHECK: vpmovusdw %xmm20, -1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x62,0x80] + vpmovusdw %xmm20, -1024(%rdx) + +// CHECK: vpmovusdw %xmm20, -1032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0xa2,0xf8,0xfb,0xff,0xff] + vpmovusdw %xmm20, -1032(%rdx) + +// CHECK: vpmovusdw %ymm23, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x39] + vpmovusdw %ymm23, (%rcx) + +// CHECK: vpmovusdw %ymm23, (%rcx) {%k1} +// CHECK: encoding: [0x62,0xe2,0x7e,0x29,0x13,0x39] + vpmovusdw %ymm23, (%rcx) {%k1} + +// CHECK: vpmovusdw %ymm23, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x13,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpmovusdw %ymm23, 291(%rax,%r14,8) + +// CHECK: vpmovusdw %ymm23, 2032(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x7a,0x7f] + vpmovusdw %ymm23, 2032(%rdx) + +// CHECK: vpmovusdw %ymm23, 2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0xba,0x00,0x08,0x00,0x00] + vpmovusdw %ymm23, 2048(%rdx) + +// CHECK: vpmovusdw %ymm23, -2048(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x7a,0x80] + vpmovusdw %ymm23, -2048(%rdx) + +// CHECK: vpmovusdw %ymm23, -2064(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0xba,0xf0,0xf7,0xff,0xff] + vpmovusdw %ymm23, -2064(%rdx) + // CHECK: vrndscalepd $171, %xmm28, %xmm29 // CHECK: encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0xab] vrndscalepd $0xab, %xmm28, %xmm29 |

