summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512vl-intrinsics.ll')
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-intrinsics.ll1037
1 files changed, 439 insertions, 598 deletions
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index c1f8b8bb161..7d5e8736c16 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -4016,8 +4016,7 @@ declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>,
define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
@@ -4033,8 +4032,7 @@ declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>,
define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
@@ -4050,8 +4048,7 @@ declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i
define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
@@ -4067,8 +4064,7 @@ declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i
define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
@@ -4084,8 +4080,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8
define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4101,8 +4096,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8
define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4118,8 +4112,7 @@ declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>
define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0
; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
@@ -4135,8 +4128,7 @@ declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8
define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
@@ -4152,8 +4144,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i
define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4169,8 +4160,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i
define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4186,8 +4176,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8)
define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtps2dq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4203,8 +4192,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8)
define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtps2dq %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0
; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
@@ -4220,8 +4208,7 @@ declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double
define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
@@ -4237,8 +4224,7 @@ declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double
define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
@@ -4254,8 +4240,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8
define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtps2udq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtps2udq %xmm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4271,8 +4256,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8
define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtps2udq %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vcvtps2udq %ymm0, %ymm0
; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
@@ -4288,8 +4272,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i
define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4305,8 +4288,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i
define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4322,8 +4304,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>,
define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4339,8 +4320,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>,
define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4356,8 +4336,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8
define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4373,8 +4352,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8
define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
@@ -4390,8 +4368,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i
define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
@@ -4407,8 +4384,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i
define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0
; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
@@ -4424,8 +4400,7 @@ declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>
define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm0
; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
@@ -4441,8 +4416,7 @@ declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>
define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0
; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
@@ -4458,8 +4432,7 @@ declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>,
define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
@@ -4475,8 +4448,7 @@ declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>,
define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0
; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
@@ -4545,8 +4517,7 @@ declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float
define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
@@ -4569,8 +4540,7 @@ declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x dou
define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
@@ -4593,8 +4563,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32
define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0
@@ -4612,8 +4581,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32
define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0
@@ -4651,8 +4619,7 @@ declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2
define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm0
@@ -4672,8 +4639,7 @@ declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4
define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm0
; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
@@ -4689,8 +4655,7 @@ declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x
define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
@@ -4706,8 +4671,7 @@ declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x
define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm0
; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
@@ -4723,8 +4687,7 @@ declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double
define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: ## xmm2 = xmm2[0],k1[1]
; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
@@ -4747,8 +4710,7 @@ declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double
define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: ## ymm2 = ymm2[0],k1[1],ymm2[3],k1[2]
; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0
@@ -4766,8 +4728,7 @@ declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>,
define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: ## xmm2 = xmm2[2,1],k1[1,0]
; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0
@@ -4785,8 +4746,7 @@ declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>,
define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: ## ymm2 = ymm2[2,1],k1[1,0],ymm2[6,5],k1[5,4]
; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0
@@ -4804,8 +4764,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32,
define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm0
@@ -4825,8 +4784,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32,
define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
@@ -4842,8 +4800,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32,
define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
@@ -4859,8 +4816,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32,
define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
@@ -4876,8 +4832,7 @@ declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4
define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: ## ymm1 = ymm1[0,1,3,2]
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z}
@@ -4900,8 +4855,7 @@ declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2
define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: ## xmm1 = xmm1[1,0]
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z}
@@ -4924,8 +4878,7 @@ declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x
define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: ## ymm1 = ymm1[2,1,1,0,6,5,5,4]
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z}
@@ -4948,8 +4901,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x
define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: ## xmm1 = xmm1[2,1,1,0]
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z}
@@ -4972,8 +4924,7 @@ declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x
define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
@@ -4993,8 +4944,7 @@ declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x
define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
@@ -5014,8 +4964,7 @@ declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i3
define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0
@@ -5035,8 +4984,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i3
define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0
@@ -5056,8 +5004,7 @@ declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x floa
define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
@@ -5077,8 +5024,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i3
define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
@@ -5099,8 +5045,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4
define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1}
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0
@@ -5117,8 +5062,7 @@ declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4
define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0
@@ -5135,8 +5079,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8
define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1}
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0
@@ -5153,8 +5096,7 @@ declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8
define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0
@@ -5171,8 +5113,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2
define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1}
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0
@@ -5189,8 +5130,7 @@ declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2
define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0
@@ -5207,8 +5147,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4
define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1}
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0
@@ -5225,8 +5164,7 @@ declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4
define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0
@@ -5243,8 +5181,7 @@ declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vpbroadcastd %xmm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
@@ -5264,8 +5201,7 @@ declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0
@@ -5285,8 +5221,7 @@ declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
@@ -5306,8 +5241,7 @@ declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
@@ -5324,7 +5258,7 @@ define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x
define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
; CHECK: test_x86_vcvtph2ps_128
- ; CHECK: vcvtph2ps %xmm0, %xmm0
+ ; CHECK: vcvtph2ps %xmm0, %xmm0
%res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1)
ret <4 x float> %res
}
@@ -5355,7 +5289,7 @@ define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) {
; CHECK: test_x86_vcvtph2ps_256_rrk
- ; CHECK: vcvtph2ps %xmm0, %ymm1 {%k1}
+ ; CHECK: vcvtph2ps %xmm0, %ymm1 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask)
ret <8 x float> %res
}
@@ -5393,17 +5327,16 @@ declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>,
define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1}
; CHECK-NEXT: ## xmm1 = xmm0[0,0,2,2]
-; CHECK-NEXT: vmovsldup %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vmovsldup %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2]
-; CHECK-NEXT: vmovsldup %xmm0, %xmm0
+; CHECK-NEXT: vmovsldup %xmm0, %xmm0
; CHECK-NEXT: ## xmm0 = xmm0[0,0,2,2]
-; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
%res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
%res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
@@ -5417,17 +5350,16 @@ declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>,
define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1}
; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2,4,4,6,6]
-; CHECK-NEXT: vmovsldup %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vmovsldup %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6]
-; CHECK-NEXT: vmovsldup %ymm0, %ymm0
+; CHECK-NEXT: vmovsldup %ymm0, %ymm0
; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2,4,4,6,6]
-; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
%res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
%res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
@@ -5441,17 +5373,16 @@ declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>,
define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1}
; CHECK-NEXT: ## xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT: vmovshdup %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vmovshdup %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3]
-; CHECK-NEXT: vmovshdup %xmm0, %xmm0
+; CHECK-NEXT: vmovshdup %xmm0, %xmm0
; CHECK-NEXT: ## xmm0 = xmm0[1,1,3,3]
-; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
%res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
%res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
@@ -5465,17 +5396,16 @@ declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>,
define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1}
; CHECK-NEXT: ## ymm1 = ymm0[1,1,3,3,5,5,7,7]
-; CHECK-NEXT: vmovshdup %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vmovshdup %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7]
-; CHECK-NEXT: vmovshdup %ymm0, %ymm0
+; CHECK-NEXT: vmovshdup %ymm0, %ymm0
; CHECK-NEXT: ## ymm0 = ymm0[1,1,3,3,5,5,7,7]
-; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
%res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
%res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
@@ -5488,8 +5418,7 @@ declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double
define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1}
; CHECK-NEXT: ## xmm1 = xmm0[0,0]
; CHECK-NEXT: vmovddup %xmm0, %xmm2 {%k1} {z}
@@ -5512,8 +5441,7 @@ declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double
define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1}
; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2]
; CHECK-NEXT: vmovddup %ymm0, %ymm2 {%k1} {z}
@@ -5714,15 +5642,15 @@ declare <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double>, <2 x double>, i
define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) {
; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256:
-; CHECK: kmovw %eax, %k1
-; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
-; CHECK-NEXT: vbroadcastsd %xmm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
-; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-
- %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1)
- %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask)
- %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
+; CHECK: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+
+ %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
%res3 = fadd <4 x double> %res, %res1
%res4 = fadd <4 x double> %res2, %res3
ret <4 x double> %res4
@@ -5731,15 +5659,15 @@ declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4
define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) {
; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256:
-; CHECK: kmovw %eax, %k1
+; CHECK: kmovw %edi, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vbroadcastss %xmm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
- %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1)
- %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask)
- %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
+ %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
%res3 = fadd <8 x float> %res, %res1
%res4 = fadd <8 x float> %res2, %res3
ret <8 x float> %res4
@@ -5748,15 +5676,15 @@ declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x
define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) {
; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128:
-; CHECK: kmovw %eax, %k1
+; CHECK: kmovw %edi, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vbroadcastss %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
- %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
- %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
- %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
+ %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
%res3 = fadd <4 x float> %res, %res1
%res4 = fadd <4 x float> %res2, %res3
ret <4 x float> %res4
@@ -5768,7 +5696,7 @@ declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x f
define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
-; CHECK: kmovw %eax, %k1
+; CHECK: kmovw %edi, %k1
; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm1 {%k1}
; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm0
@@ -5787,7 +5715,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>,
define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
-; CHECK: kmovw %eax, %k1
+; CHECK: kmovw %edi, %k1
; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm1 {%k1}
; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm0
@@ -5807,8 +5735,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i6
define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
@@ -5828,8 +5755,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i6
define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
@@ -5849,8 +5775,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64>, i8, <2 x i64>, i8
define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm0
@@ -5870,8 +5795,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64>, i8, <4 x i64>, i8
define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm0
@@ -5889,8 +5813,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>, <4 x i3
define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0
@@ -5910,8 +5833,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i3
define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm0
@@ -5931,8 +5853,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32>, i8, <4 x i32>, i8
define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsrld $255, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vpsrld $255, %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpsrld $255, %xmm0, %xmm0
@@ -5952,8 +5873,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32>, i8, <8 x i32>, i8
define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsrld $255, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vpsrld $255, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpsrld $255, %ymm0, %ymm0
@@ -5993,8 +5913,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64
define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv2_di:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
@@ -6014,8 +5933,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64
define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_di:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
@@ -6035,8 +5953,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32
define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_si:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
@@ -6056,8 +5973,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32
define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_si:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
@@ -6077,8 +5993,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32>, <4 x i32>, <4 x i3
define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0
@@ -6098,8 +6013,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32>, <4 x i32>, <8 x i3
define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm0
@@ -6119,8 +6033,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32>, i8, <4 x i32>, i8
define <4 x i32>@test_int_x86_avx512_mask_psra_di_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsrad $3, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vpsrad $3, %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpsrad $3, %xmm0, %xmm0
@@ -6140,8 +6053,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32>, i8, <8 x i32>, i8
define <8 x i32>@test_int_x86_avx512_mask_psra_di_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsrad $3, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vpsrad $3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpsrad $3, %ymm0, %ymm0
@@ -6161,8 +6073,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64>, <2 x i64>, <2 x i6
define <2 x i64>@test_int_x86_avx512_mask_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psra_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm0
@@ -6182,8 +6093,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64>, <2 x i64>, <4 x i6
define <4 x i64>@test_int_x86_avx512_mask_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psra_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm0
@@ -6203,8 +6113,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64>, i8, <2 x i64>, i8
define <2 x i64>@test_int_x86_avx512_mask_psra_qi_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vpsraq $3, %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpsraq $3, %xmm0, %xmm0
@@ -6224,8 +6133,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64>, i8, <4 x i64>, i8
define <4 x i64>@test_int_x86_avx512_mask_psra_qi_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vpsraq $3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpsraq $3, %ymm0, %ymm0
@@ -6246,8 +6154,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32>, <4 x i32>, <4 x i3
define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0
@@ -6267,8 +6174,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32>, <4 x i32>, <8 x i3
define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm0
@@ -6288,8 +6194,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32>, i8, <4 x i32>, i8
define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpslld $3, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vpslld $3, %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpslld $3, %xmm0, %xmm0
@@ -6309,8 +6214,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32>, i8, <8 x i32>, i8
define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpslld $3, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vpslld $3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpslld $3, %ymm0, %ymm0
@@ -6330,8 +6234,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64>, <2 x i64>, <4 x i6
define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psll_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm0
@@ -6351,8 +6254,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64>, i8, <2 x i64>, i8
define <2 x i64>@test_int_x86_avx512_mask_psll_qi_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsllq $3, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vpsllq $3, %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpsllq $3, %xmm0, %xmm0
@@ -6372,8 +6274,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64>, i8, <4 x i64>, i8
define <4 x i64>@test_int_x86_avx512_mask_psll_qi_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpsllq $3, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vpsllq $3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpsllq $3, %ymm0, %ymm0
@@ -6391,8 +6292,7 @@ define <4 x i64>@test_int_x86_avx512_mask_psll_qi_256(<4 x i64> %x0, i8 %x1, <4
define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_aligned_ps_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovaps (%rdi), %ymm0
; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1}
; CHECK-NEXT: vmovaps (%rdi), %ymm1 {%k1} {z}
@@ -6410,8 +6310,7 @@ declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8)
define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_unaligned_ps_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovups (%rdi), %ymm0
; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1}
; CHECK-NEXT: vmovups (%rdi), %ymm1 {%k1} {z}
@@ -6429,8 +6328,7 @@ declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8)
define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_aligned_pd_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovapd (%rdi), %ymm0
; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: vmovapd (%rdi), %ymm1 {%k1} {z}
@@ -6448,8 +6346,7 @@ declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8)
define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_unaligned_pd_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovupd (%rdi), %ymm0
; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: vmovupd (%rdi), %ymm1 {%k1} {z}
@@ -6467,8 +6364,7 @@ declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8)
define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_aligned_ps_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovaps (%rdi), %xmm0
; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1}
; CHECK-NEXT: vmovaps (%rdi), %xmm1 {%k1} {z}
@@ -6486,8 +6382,7 @@ declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8)
define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_unaligned_ps_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovups (%rdi), %xmm0
; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1}
; CHECK-NEXT: vmovups (%rdi), %xmm1 {%k1} {z}
@@ -6505,8 +6400,7 @@ declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8)
define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_aligned_pd_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovapd (%rdi), %xmm0
; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1}
; CHECK-NEXT: vmovapd (%rdi), %xmm1 {%k1} {z}
@@ -6524,8 +6418,7 @@ declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8)
define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_unaligned_pd_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovupd (%rdi), %xmm0
; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1}
; CHECK-NEXT: vmovupd (%rdi), %xmm1 {%k1} {z}
@@ -6545,8 +6438,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32
define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrav4_si:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0
@@ -6566,8 +6458,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32
define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
@@ -6587,8 +6478,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i
define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm0
@@ -6608,8 +6498,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i
define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm0
@@ -6629,8 +6518,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64
define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psllv2_di:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
@@ -6650,8 +6538,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64
define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_di:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
@@ -6671,8 +6558,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32
define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_si:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
@@ -6692,8 +6578,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32
define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_si:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
@@ -6713,14 +6598,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i
define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm3 {%k1} {z}
-; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
%res2 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
@@ -6734,14 +6618,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i
define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1}
-; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm3 {%k1} {z}
-; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
%res2 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
@@ -6755,14 +6638,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i
define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm3 {%k1} {z}
-; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
%res2 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
@@ -6776,14 +6658,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i
define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1}
-; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm3 {%k1} {z}
-; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
%res2 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
@@ -6796,14 +6677,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i8, <4 x i32>, i8)
define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprold $3, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vprold $3, %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vprold $3, %xmm0, %xmm0
-; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vprold $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vprold $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vprold $3, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 %x3)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> zeroinitializer, i8 %x3)
%res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 -1)
@@ -6817,14 +6697,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i8, <8 x i32>, i8)
define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprold $3, %ymm0, %ymm1 {%k1}
-; CHECK-NEXT: vprold $3, %ymm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vprold $3, %ymm0, %ymm0
-; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vprold $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vprold $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vprold $3, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> zeroinitializer, i8 %x3)
%res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 -1)
@@ -6838,14 +6717,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i8, <2 x i64>, i8)
define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprolq $3, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vprolq $3, %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vprolq $3, %xmm0, %xmm0
-; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vprolq $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vprolq $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vprolq $3, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> zeroinitializer, i8 %x3)
%res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 -1)
@@ -6859,14 +6737,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i8, <4 x i64>, i8)
define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprolq $3, %ymm0, %ymm1 {%k1}
-; CHECK-NEXT: vprolq $3, %ymm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vprolq $3, %ymm0, %ymm0
-; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vprolq $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vprolq $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vprolq $3, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3)
%res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1)
@@ -6880,8 +6757,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8)
define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_aligned_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0
; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1}
; CHECK-NEXT: vmovdqa32 (%rdi), %xmm1 {%k1} {z}
@@ -6899,8 +6775,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8)
define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_aligned_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1}
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm1 {%k1} {z}
@@ -6918,8 +6793,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8)
define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_aligned_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0
; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1}
; CHECK-NEXT: vmovdqa64 (%rdi), %xmm1 {%k1} {z}
@@ -6937,8 +6811,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8)
define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_load_aligned_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1}
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm1 {%k1} {z}
@@ -6956,11 +6829,10 @@ declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i
define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm3 {%k1} {z}
-; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
@@ -6977,14 +6849,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i
define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1}
-; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm3 {%k1} {z}
-; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
%res2 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
@@ -6998,14 +6869,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i
define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm3 {%k1} {z}
-; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
%res2 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
@@ -7019,14 +6889,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i
define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1}
-; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm3 {%k1} {z}
-; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
%res2 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
@@ -7040,14 +6909,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i8, <4 x i32>, i8)
define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprord $3, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vprord $3, %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vprord $3, %xmm0, %xmm0
-; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vprord $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vprord $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vprord $3, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 %x3)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> zeroinitializer, i8 %x3)
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 -1)
@@ -7061,14 +6929,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i8, <8 x i32>, i8)
define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprord $3, %ymm0, %ymm1 {%k1}
-; CHECK-NEXT: vprord $3, %ymm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vprord $3, %ymm0, %ymm0
-; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vprord $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vprord $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vprord $3, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> zeroinitializer, i8 %x3)
%res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 -1)
@@ -7082,14 +6949,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i8, <2 x i64>, i8)
define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprorq $3, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vprorq $3, %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vprorq $3, %xmm0, %xmm0
-; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vprorq $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vprorq $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vprorq $3, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> zeroinitializer, i8 %x3)
%res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 -1)
@@ -7103,14 +6969,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i8, <4 x i64>, i8)
define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vprorq $3, %ymm0, %ymm1 {%k1}
-; CHECK-NEXT: vprorq $3, %ymm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vprorq $3, %ymm0, %ymm0
-; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vprorq $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vprorq $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vprorq $3, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3)
%res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1)
@@ -7124,14 +6989,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8)
define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vpmovzxbd %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vpmovzxbd %xmm0, %xmm0
-; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovzxbd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
@@ -7145,14 +7009,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8)
define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1}
-; CHECK-NEXT: vpmovzxbd %xmm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpmovzxbd %xmm0, %ymm0
-; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovzxbd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbd %xmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
%res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
@@ -7166,14 +7029,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8)
define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vpmovzxbq %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vpmovzxbq %xmm0, %xmm0
-; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovzxbq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
%res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
@@ -7187,14 +7049,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8)
define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1}
-; CHECK-NEXT: vpmovzxbq %xmm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpmovzxbq %xmm0, %ymm0
-; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovzxbq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
%res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
@@ -7208,14 +7069,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8)
define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vpmovzxdq %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vpmovzxdq %xmm0, %xmm0
-; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovzxdq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxdq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
%res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
@@ -7229,14 +7089,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8)
define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1}
-; CHECK-NEXT: vpmovzxdq %xmm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpmovzxdq %xmm0, %ymm0
-; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovzxdq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxdq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
%res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
@@ -7250,14 +7109,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8)
define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vpmovzxwd %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vpmovzxwd %xmm0, %xmm0
-; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovzxwd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxwd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
@@ -7271,14 +7129,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8)
define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1}
-; CHECK-NEXT: vpmovzxwd %xmm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpmovzxwd %xmm0, %ymm0
-; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovzxwd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxwd %xmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
%res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
@@ -7292,14 +7149,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8)
define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vpmovzxwq %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vpmovzxwq %xmm0, %xmm0
-; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovzxwq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxwq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
%res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
@@ -7313,14 +7169,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8)
define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1}
-; CHECK-NEXT: vpmovzxwq %xmm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpmovzxwq %xmm0, %ymm0
-; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovzxwq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxwq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
%res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
@@ -7334,14 +7189,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8)
define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vpmovsxbd %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0
-; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsxbd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
@@ -7355,14 +7209,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8)
define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1}
-; CHECK-NEXT: vpmovsxbd %xmm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0
-; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsxbd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
%res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
@@ -7376,14 +7229,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8)
define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vpmovsxbq %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0
-; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsxbq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
%res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
@@ -7397,14 +7249,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8)
define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1}
-; CHECK-NEXT: vpmovsxbq %xmm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0
-; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsxbq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
%res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
@@ -7418,14 +7269,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8)
define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vpmovsxwd %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
-; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
@@ -7439,14 +7289,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8)
define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1}
-; CHECK-NEXT: vpmovsxwd %xmm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
-; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsxwd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
%res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
@@ -7460,14 +7309,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8)
define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vpmovsxwq %xmm0, %xmm2 {%k1} {z}
-; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
-; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsxwq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
%res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
@@ -7481,14 +7329,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8)
define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1}
-; CHECK-NEXT: vpmovsxwq %xmm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0
-; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsxwq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
%res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
@@ -7502,15 +7349,14 @@ declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i8, <4 x do
define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i8 %x1, <4 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1}
-; CHECK-NEXT: vpermpd $3, %ymm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpermpd $3, %ymm0, %ymm0
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vpermpd $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpermpd $3, %ymm0, %ymm0
; CHECK-NEXT: ## ymm0 = ymm0[3,0,0,0]
-; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i8 3, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i8 3, <4 x double> zeroinitializer, i8 %x3)
%res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i8 3, <4 x double> %x2, i8 -1)
@@ -7524,15 +7370,14 @@ declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i8, <4 x i64>, i8
define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermq $3, %ymm0, %ymm1 {%k1}
-; CHECK-NEXT: vpermq $3, %ymm0, %ymm2 {%k1} {z}
-; CHECK-NEXT: vpermq $3, %ymm0, %ymm0
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpermq $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vpermq $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpermq $3, %ymm0, %ymm0
; CHECK-NEXT: ## ymm0 = ymm0[3,0,0,0]
-; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
-; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3)
%res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1)
@@ -7545,14 +7390,13 @@ declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64
define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm2 {%k1}
-; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm3 {%k1} {z}
-; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
%res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
@@ -7566,14 +7410,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4
define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm2 {%k1}
-; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm3 {%k1} {z}
-; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
%res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
@@ -7589,14 +7432,13 @@ declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>,
define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm2 {%k1}
-; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm3 {%k1} {z}
-; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
%res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
@@ -7610,14 +7452,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8
define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm2 {%k1}
-; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm3 {%k1} {z}
-; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
%res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
OpenPOWER on IntegriCloud