diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512vl-intrinsics.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 1037 |
1 files changed, 439 insertions, 598 deletions
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index c1f8b8bb161..7d5e8736c16 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -4016,8 +4016,7 @@ declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 @@ -4033,8 +4032,7 @@ declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 @@ -4050,8 +4048,7 @@ declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 @@ -4067,8 +4064,7 @@ declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 @@ -4084,8 +4080,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4101,8 +4096,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4118,8 +4112,7 @@ declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float> define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 @@ -4135,8 +4128,7 @@ declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 @@ -4152,8 +4144,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4169,8 +4160,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4186,8 +4176,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8) define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4203,8 +4192,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8) define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 @@ -4220,8 +4208,7 @@ declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 @@ -4237,8 +4224,7 @@ declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 @@ -4254,8 +4240,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8 define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtps2udq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtps2udq %xmm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4271,8 +4256,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8 define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtps2udq %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vcvtps2udq %ymm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 @@ -4288,8 +4272,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4305,8 +4288,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4322,8 +4304,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>, define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4339,8 +4320,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4356,8 +4336,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4373,8 +4352,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 @@ -4390,8 +4368,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 @@ -4407,8 +4384,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 @@ -4424,8 +4400,7 @@ declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double> define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm0 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 @@ -4441,8 +4416,7 @@ declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double> define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 @@ -4458,8 +4432,7 @@ declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 @@ -4475,8 +4448,7 @@ declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 @@ -4545,8 +4517,7 @@ declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} @@ -4569,8 +4540,7 @@ declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x dou define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} @@ -4593,8 +4563,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32 define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0 @@ -4612,8 +4581,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32 define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0 @@ -4651,8 +4619,7 @@ declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm0 @@ -4672,8 +4639,7 @@ declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm0 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 @@ -4689,8 +4655,7 @@ declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm0 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 @@ -4706,8 +4671,7 @@ declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm0 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 @@ -4723,8 +4687,7 @@ declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: ## xmm2 = xmm2[0],k1[1] ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} @@ -4747,8 +4710,7 @@ declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: ## ymm2 = ymm2[0],k1[1],ymm2[3],k1[2] ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0 @@ -4766,8 +4728,7 @@ declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: ## xmm2 = xmm2[2,1],k1[1,0] ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0 @@ -4785,8 +4746,7 @@ declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: ## ymm2 = ymm2[2,1],k1[1,0],ymm2[6,5],k1[5,4] ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0 @@ -4804,8 +4764,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm0 @@ -4825,8 +4784,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 @@ -4842,8 +4800,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 @@ -4859,8 +4816,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 @@ -4876,8 +4832,7 @@ declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: ## ymm1 = ymm1[0,1,3,2] ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z} @@ -4900,8 +4855,7 @@ declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: ## xmm1 = xmm1[1,0] ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z} @@ -4924,8 +4878,7 @@ declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: ## ymm1 = ymm1[2,1,1,0,6,5,5,4] ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z} @@ -4948,8 +4901,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: ## xmm1 = xmm1[2,1,1,0] ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z} @@ -4972,8 +4924,7 @@ declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 @@ -4993,8 +4944,7 @@ declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 @@ -5014,8 +4964,7 @@ declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i3 define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 @@ -5035,8 +4984,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i3 define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 @@ -5056,8 +5004,7 @@ declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x floa define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 @@ -5077,8 +5024,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i3 define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 @@ -5099,8 +5045,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 @@ -5117,8 +5062,7 @@ declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 @@ -5135,8 +5079,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 @@ -5153,8 +5096,7 @@ declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 @@ -5171,8 +5113,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 @@ -5189,8 +5130,7 @@ declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 @@ -5207,8 +5147,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 @@ -5225,8 +5164,7 @@ declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 @@ -5243,8 +5181,7 @@ declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8) define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0 @@ -5264,8 +5201,7 @@ declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8) define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0 @@ -5285,8 +5221,7 @@ declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 @@ -5306,8 +5241,7 @@ declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8) define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 @@ -5324,7 +5258,7 @@ define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) { ; CHECK: test_x86_vcvtph2ps_128 - ; CHECK: vcvtph2ps %xmm0, %xmm0 + ; CHECK: vcvtph2ps %xmm0, %xmm0 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1) ret <4 x float> %res } @@ -5355,7 +5289,7 @@ define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) { define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) { ; CHECK: test_x86_vcvtph2ps_256_rrk - ; CHECK: vcvtph2ps %xmm0, %ymm1 {%k1} + ; CHECK: vcvtph2ps %xmm0, %ymm1 {%k1} %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask) ret <8 x float> %res } @@ -5393,17 +5327,16 @@ declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1} +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1} ; CHECK-NEXT: ## xmm1 = xmm0[0,0,2,2] -; CHECK-NEXT: vmovsldup %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vmovsldup %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2] -; CHECK-NEXT: vmovsldup %xmm0, %xmm0 +; CHECK-NEXT: vmovsldup %xmm0, %xmm0 ; CHECK-NEXT: ## xmm0 = xmm0[0,0,2,2] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) @@ -5417,17 +5350,16 @@ declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1} +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1} ; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2,4,4,6,6] -; CHECK-NEXT: vmovsldup %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vmovsldup %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6] -; CHECK-NEXT: vmovsldup %ymm0, %ymm0 +; CHECK-NEXT: vmovsldup %ymm0, %ymm0 ; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2,4,4,6,6] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) @@ -5441,17 +5373,16 @@ declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1} +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1} ; CHECK-NEXT: ## xmm1 = xmm0[1,1,3,3] -; CHECK-NEXT: vmovshdup %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vmovshdup %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3] -; CHECK-NEXT: vmovshdup %xmm0, %xmm0 +; CHECK-NEXT: vmovshdup %xmm0, %xmm0 ; CHECK-NEXT: ## xmm0 = xmm0[1,1,3,3] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) @@ -5465,17 +5396,16 @@ declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1} +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1} ; CHECK-NEXT: ## ymm1 = ymm0[1,1,3,3,5,5,7,7] -; CHECK-NEXT: vmovshdup %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vmovshdup %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7] -; CHECK-NEXT: vmovshdup %ymm0, %ymm0 +; CHECK-NEXT: vmovshdup %ymm0, %ymm0 ; CHECK-NEXT: ## ymm0 = ymm0[1,1,3,3,5,5,7,7] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) @@ -5488,8 +5418,7 @@ declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1} ; CHECK-NEXT: ## xmm1 = xmm0[0,0] ; CHECK-NEXT: vmovddup %xmm0, %xmm2 {%k1} {z} @@ -5512,8 +5441,7 @@ declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1} ; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2] ; CHECK-NEXT: vmovddup %ymm0, %ymm2 {%k1} {z} @@ -5714,15 +5642,15 @@ declare <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double>, <2 x double>, i define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) { ; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256: -; CHECK: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} -; CHECK-NEXT: vbroadcastsd %xmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 -; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 - - %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1) - %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask) - %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask) +; CHECK: kmovw %edi, %k1 +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 +; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 + + %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1) + %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask) + %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask) %res3 = fadd <4 x double> %res, %res1 %res4 = fadd <4 x double> %res2, %res3 ret <4 x double> %res4 @@ -5731,15 +5659,15 @@ declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) { ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256: -; CHECK: kmovw %eax, %k1 +; CHECK: kmovw %edi, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vbroadcastss %xmm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 - %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1) - %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask) - %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask) + %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1) + %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask) + %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask) %res3 = fadd <8 x float> %res, %res1 %res4 = fadd <8 x float> %res2, %res3 ret <8 x float> %res4 @@ -5748,15 +5676,15 @@ declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) { ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128: -; CHECK: kmovw %eax, %k1 +; CHECK: kmovw %edi, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vbroadcastss %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 - %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1) - %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) - %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask) + %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1) + %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) + %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask) %res3 = fadd <4 x float> %res, %res1 %res4 = fadd <4 x float> %res2, %res3 ret <4 x float> %res4 @@ -5768,7 +5696,7 @@ declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x f define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256: -; CHECK: kmovw %eax, %k1 +; CHECK: kmovw %edi, %k1 ; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm0 @@ -5787,7 +5715,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256: -; CHECK: kmovw %eax, %k1 +; CHECK: kmovw %edi, %k1 ; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm0 @@ -5807,8 +5735,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i6 define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 @@ -5828,8 +5755,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i6 define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 @@ -5849,8 +5775,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64>, i8, <2 x i64>, i8 define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm0 @@ -5870,8 +5795,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64>, i8, <4 x i64>, i8 define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm0 @@ -5889,8 +5813,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>, <4 x i3 define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0 @@ -5910,8 +5833,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i3 define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm0 @@ -5931,8 +5853,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32>, i8, <4 x i32>, i8 define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpsrld $255, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vpsrld $255, %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vpsrld $255, %xmm0, %xmm0 @@ -5952,8 +5873,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32>, i8, <8 x i32>, i8 define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpsrld $255, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vpsrld $255, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vpsrld $255, %ymm0, %ymm0 @@ -5993,8 +5913,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64 define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv2_di: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 @@ -6014,8 +5933,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64 define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_di: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 @@ -6035,8 +5953,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32 define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_si: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 @@ -6056,8 +5973,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32 define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_si: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 @@ -6077,8 +5993,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32>, <4 x i32>, <4 x i3 define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0 @@ -6098,8 +6013,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32>, <4 x i32>, <8 x i3 define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm0 @@ -6119,8 +6033,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32>, i8, <4 x i32>, i8 define <4 x i32>@test_int_x86_avx512_mask_psra_di_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpsrad $3, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vpsrad $3, %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vpsrad $3, %xmm0, %xmm0 @@ -6140,8 +6053,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32>, i8, <8 x i32>, i8 define <8 x i32>@test_int_x86_avx512_mask_psra_di_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpsrad $3, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vpsrad $3, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vpsrad $3, %ymm0, %ymm0 @@ -6161,8 +6073,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64>, <2 x i64>, <2 x i6 define <2 x i64>@test_int_x86_avx512_mask_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm0 @@ -6182,8 +6093,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64>, <2 x i64>, <4 x i6 define <4 x i64>@test_int_x86_avx512_mask_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm0 @@ -6203,8 +6113,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64>, i8, <2 x i64>, i8 define <2 x i64>@test_int_x86_avx512_mask_psra_qi_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vpsraq $3, %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vpsraq $3, %xmm0, %xmm0 @@ -6224,8 +6133,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64>, i8, <4 x i64>, i8 define <4 x i64>@test_int_x86_avx512_mask_psra_qi_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vpsraq $3, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vpsraq $3, %ymm0, %ymm0 @@ -6246,8 +6154,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32>, <4 x i32>, <4 x i3 define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0 @@ -6267,8 +6174,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32>, <4 x i32>, <8 x i3 define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm0 @@ -6288,8 +6194,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32>, i8, <4 x i32>, i8 define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpslld $3, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vpslld $3, %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vpslld $3, %xmm0, %xmm0 @@ -6309,8 +6214,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32>, i8, <8 x i32>, i8 define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpslld $3, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vpslld $3, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vpslld $3, %ymm0, %ymm0 @@ -6330,8 +6234,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64>, <2 x i64>, <4 x i6 define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm0 @@ -6351,8 +6254,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64>, i8, <2 x i64>, i8 define <2 x i64>@test_int_x86_avx512_mask_psll_qi_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpsllq $3, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vpsllq $3, %xmm0, %xmm2 {%k1} {z} ; CHECK-NEXT: vpsllq $3, %xmm0, %xmm0 @@ -6372,8 +6274,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64>, i8, <4 x i64>, i8 define <4 x i64>@test_int_x86_avx512_mask_psll_qi_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vpsllq $3, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vpsllq $3, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vpsllq $3, %ymm0, %ymm0 @@ -6391,8 +6292,7 @@ define <4 x i64>@test_int_x86_avx512_mask_psll_qi_256(<4 x i64> %x0, i8 %x1, <4 define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ; CHECK-NEXT: vmovaps (%rdi), %ymm1 {%k1} {z} @@ -6410,8 +6310,7 @@ declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8) define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovups (%rdi), %ymm0 ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ; CHECK-NEXT: vmovups (%rdi), %ymm1 {%k1} {z} @@ -6429,8 +6328,7 @@ declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8) define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 ; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ; CHECK-NEXT: vmovapd (%rdi), %ymm1 {%k1} {z} @@ -6448,8 +6346,7 @@ declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8) define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovupd (%rdi), %ymm0 ; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ; CHECK-NEXT: vmovupd (%rdi), %ymm1 {%k1} {z} @@ -6467,8 +6364,7 @@ declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8) define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 ; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} ; CHECK-NEXT: vmovaps (%rdi), %xmm1 {%k1} {z} @@ -6486,8 +6382,7 @@ declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8) define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovups (%rdi), %xmm0 ; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} ; CHECK-NEXT: vmovups (%rdi), %xmm1 {%k1} {z} @@ -6505,8 +6400,7 @@ declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8) define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovapd (%rdi), %xmm0 ; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} ; CHECK-NEXT: vmovapd (%rdi), %xmm1 {%k1} {z} @@ -6524,8 +6418,7 @@ declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8) define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovupd (%rdi), %xmm0 ; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} ; CHECK-NEXT: vmovupd (%rdi), %xmm1 {%k1} {z} @@ -6545,8 +6438,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32 define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrav4_si: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0 @@ -6566,8 +6458,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32 define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 @@ -6587,8 +6478,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm0 @@ -6608,8 +6498,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm0 @@ -6629,8 +6518,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64 define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psllv2_di: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 @@ -6650,8 +6538,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64 define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_di: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 @@ -6671,8 +6558,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32 define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_si: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 {%k1} {z} ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 @@ -6692,8 +6578,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32 define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_si: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm3 {%k1} {z} ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 @@ -6713,14 +6598,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm3 {%k1} {z} -; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) %res2 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) @@ -6734,14 +6618,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm3 {%k1} {z} -; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) %res2 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -6755,14 +6638,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm3 {%k1} {z} -; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) %res2 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) @@ -6776,14 +6658,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm3 {%k1} {z} -; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) %res2 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) @@ -6796,14 +6677,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i8, <4 x i32>, i8) define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprold $3, %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vprold $3, %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vprold $3, %xmm0, %xmm0 -; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vprold $3, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vprold $3, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vprold $3, %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> zeroinitializer, i8 %x3) %res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 -1) @@ -6817,14 +6697,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i8, <8 x i32>, i8) define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprold $3, %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vprold $3, %ymm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vprold $3, %ymm0, %ymm0 -; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vprold $3, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vprold $3, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vprold $3, %ymm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> zeroinitializer, i8 %x3) %res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 -1) @@ -6838,14 +6717,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i8, <2 x i64>, i8) define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vprolq $3, %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vprolq $3, %xmm0, %xmm0 -; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vprolq $3, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vprolq $3, %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> zeroinitializer, i8 %x3) %res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 -1) @@ -6859,14 +6737,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i8, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vprolq $3, %ymm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vprolq $3, %ymm0, %ymm0 -; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vprolq $3, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vprolq $3, %ymm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3) %res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1) @@ -6880,8 +6757,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8) define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm1 {%k1} {z} @@ -6899,8 +6775,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8) define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm1 {%k1} {z} @@ -6918,8 +6793,7 @@ declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8) define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm1 {%k1} {z} @@ -6937,8 +6811,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8) define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm1 {%k1} {z} @@ -6956,11 +6829,10 @@ declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm3 {%k1} {z} -; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -6977,14 +6849,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm3 {%k1} {z} -; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) %res2 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -6998,14 +6869,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm3 {%k1} {z} -; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) %res2 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) @@ -7019,14 +6889,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm3 {%k1} {z} -; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) %res2 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) @@ -7040,14 +6909,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i8, <4 x i32>, i8) define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprord $3, %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vprord $3, %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vprord $3, %xmm0, %xmm0 -; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vprord $3, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vprord $3, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vprord $3, %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> zeroinitializer, i8 %x3) %res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 -1) @@ -7061,14 +6929,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i8, <8 x i32>, i8) define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprord $3, %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vprord $3, %ymm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vprord $3, %ymm0, %ymm0 -; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vprord $3, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vprord $3, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vprord $3, %ymm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> zeroinitializer, i8 %x3) %res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 -1) @@ -7082,14 +6949,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i8, <2 x i64>, i8) define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vprorq $3, %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vprorq $3, %xmm0, %xmm0 -; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vprorq $3, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vprorq $3, %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> zeroinitializer, i8 %x3) %res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 -1) @@ -7103,14 +6969,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i8, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vprorq $3, %ymm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vprorq $3, %ymm0, %ymm0 -; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vprorq $3, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vprorq $3, %ymm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3) %res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1) @@ -7124,14 +6989,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8) define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovzxbd %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxbd %xmm0, %xmm0 -; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovzxbd %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovzxbd %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2) %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1) @@ -7145,14 +7009,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8) define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovzxbd %xmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovzxbd %xmm0, %ymm0 -; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovzxbd %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovzxbd %xmm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2) %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1) @@ -7166,14 +7029,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8) define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovzxbq %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxbq %xmm0, %xmm0 -; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovzxbq %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovzxbq %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2) %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1) @@ -7187,14 +7049,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovzxbq %xmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovzxbq %xmm0, %ymm0 -; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovzxbq %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovzxbq %xmm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2) %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1) @@ -7208,14 +7069,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8) define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovzxdq %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxdq %xmm0, %xmm0 -; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovzxdq %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovzxdq %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2) %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1) @@ -7229,14 +7089,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovzxdq %xmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovzxdq %xmm0, %ymm0 -; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovzxdq %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovzxdq %xmm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2) %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1) @@ -7250,14 +7109,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8) define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovzxwd %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxwd %xmm0, %xmm0 -; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovzxwd %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovzxwd %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2) %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1) @@ -7271,14 +7129,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8) define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovzxwd %xmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovzxwd %xmm0, %ymm0 -; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovzxwd %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovzxwd %xmm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2) %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1) @@ -7292,14 +7149,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8) define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovzxwq %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxwq %xmm0, %xmm0 -; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovzxwq %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovzxwq %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2) %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1) @@ -7313,14 +7169,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovzxwq %xmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovzxwq %xmm0, %ymm0 -; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovzxwq %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovzxwq %xmm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2) %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1) @@ -7334,14 +7189,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8) define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsxbd %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0 -; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsxbd %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2) %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1) @@ -7355,14 +7209,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8) define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovsxbd %xmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0 -; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovsxbd %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2) %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1) @@ -7376,14 +7229,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8) define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsxbq %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0 -; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsxbq %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2) %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1) @@ -7397,14 +7249,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovsxbq %xmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0 -; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovsxbq %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2) %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1) @@ -7418,14 +7269,13 @@ declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8) define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsxwd %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 -; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsxwd %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2) %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1) @@ -7439,14 +7289,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8) define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovsxwd %xmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 -; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovsxwd %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2) %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1) @@ -7460,14 +7309,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8) define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsxwq %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0 -; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpmovsxwq %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2) %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1) @@ -7481,14 +7329,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovsxwq %xmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0 -; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpmovsxwq %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2) %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1) @@ -7502,15 +7349,14 @@ declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i8, <4 x do define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i8 %x1, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vpermpd $3, %ymm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpermpd $3, %ymm0, %ymm0 +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vpermpd $3, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpermpd $3, %ymm0, %ymm0 ; CHECK-NEXT: ## ymm0 = ymm0[3,0,0,0] -; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i8 3, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i8 3, <4 x double> zeroinitializer, i8 %x3) %res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i8 3, <4 x double> %x2, i8 -1) @@ -7524,15 +7370,14 @@ declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i8, <4 x i64>, i8 define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} -; CHECK-NEXT: vpermq $3, %ymm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpermq $3, %ymm0, %ymm0 +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vpermq $3, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpermq $3, %ymm0, %ymm0 ; CHECK-NEXT: ## ymm0 = ymm0[3,0,0,0] -; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3) %res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1) @@ -7545,14 +7390,13 @@ declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64 define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm3 {%k1} {z} -; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpermpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) %res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) @@ -7566,14 +7410,13 @@ declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm3 {%k1} {z} -; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpermq %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) %res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) @@ -7589,14 +7432,13 @@ declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm3 {%k1} {z} -; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1 -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpermps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) @@ -7610,14 +7452,13 @@ declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256: ; CHECK: ## BB#0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm3 {%k1} {z} -; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpermd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) %res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) |