diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-09-04 18:13:33 +0000 | 
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-09-04 18:13:33 +0000 | 
| commit | 4177345d7fe6b186ed34400b1562fde4b47db1ec (patch) | |
| tree | 2898cdb41fc0a0cd528622fb0382ad7cf1a53b7f /llvm | |
| parent | 38c7927b6f6c54ea48144b3d39aaffb18c31aee4 (diff) | |
| download | bcm5719-llvm-4177345d7fe6b186ed34400b1562fde4b47db1ec.tar.gz bcm5719-llvm-4177345d7fe6b186ed34400b1562fde4b47db1ec.zip  | |
[AVX-512] Remove 128-bit and 256-bit masked floating point add/sub/mul/div intrinsics and upgrade to native IR.
llvm-svn: 280633
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 48 | ||||
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 44 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 248 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 256 | 
5 files changed, 296 insertions, 316 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 316749f9a6e..ad05850c56e 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -5233,75 +5233,27 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".  // Arithmetic ops  let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.". -  def int_x86_avx512_mask_add_ps_128 : GCCBuiltin<"__builtin_ia32_addps128_mask">, -          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, -                     llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_add_ps_256 : GCCBuiltin<"__builtin_ia32_addps256_mask">, -          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, -                     llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;    def int_x86_avx512_mask_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512_mask">,            Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,                       llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_add_pd_128 : GCCBuiltin<"__builtin_ia32_addpd128_mask">, -          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, -                     llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_add_pd_256 : GCCBuiltin<"__builtin_ia32_addpd256_mask">, -          Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, -                     llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;    def int_x86_avx512_mask_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512_mask">,            Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,                       llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_sub_ps_128 : GCCBuiltin<"__builtin_ia32_subps128_mask">, -          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, -                     llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_sub_ps_256 : GCCBuiltin<"__builtin_ia32_subps256_mask">, -          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, -                     llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;    def int_x86_avx512_mask_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512_mask">,            Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,                       llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_sub_pd_128 : GCCBuiltin<"__builtin_ia32_subpd128_mask">, -          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, -                     llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_sub_pd_256 : GCCBuiltin<"__builtin_ia32_subpd256_mask">, -          Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, -                     llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;    def int_x86_avx512_mask_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512_mask">,            Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,                       llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_mul_ps_128 : GCCBuiltin<"__builtin_ia32_mulps_mask">, -          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, -                     llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_mul_ps_256 : GCCBuiltin<"__builtin_ia32_mulps256_mask">, -          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, -                     llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;    def int_x86_avx512_mask_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512_mask">,            Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,                       llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_mul_pd_128 : GCCBuiltin<"__builtin_ia32_mulpd_mask">, -          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, -                     llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_mul_pd_256 : GCCBuiltin<"__builtin_ia32_mulpd256_mask">, -          Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, -                     llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;    def int_x86_avx512_mask_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512_mask">,            Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,                       llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_div_ps_128 : GCCBuiltin<"__builtin_ia32_divps_mask">, -          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, -                     llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_div_ps_256 : GCCBuiltin<"__builtin_ia32_divps256_mask">, -          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, -                     llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;    def int_x86_avx512_mask_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512_mask">,            Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,                       llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_div_pd_128 : GCCBuiltin<"__builtin_ia32_divpd_mask">, -          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, -                     llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_div_pd_256 : GCCBuiltin<"__builtin_ia32_divpd256_mask">, -          Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, -                     llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;    def int_x86_avx512_mask_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512_mask">,            Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,                       llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 5f62802654e..79a8d18126d 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -274,6 +274,22 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {           Name.startswith("avx512.mask.padd.") ||           Name.startswith("avx512.mask.psub.") ||           Name.startswith("avx512.mask.pmull.") || +         Name.startswith("avx512.mask.add.pd.128") || +         Name.startswith("avx512.mask.add.pd.256") || +         Name.startswith("avx512.mask.add.ps.128") || +         Name.startswith("avx512.mask.add.ps.256") || +         Name.startswith("avx512.mask.div.pd.128") || +         Name.startswith("avx512.mask.div.pd.256") || +         Name.startswith("avx512.mask.div.ps.128") || +         Name.startswith("avx512.mask.div.ps.256") || +         Name.startswith("avx512.mask.mul.pd.128") || +         Name.startswith("avx512.mask.mul.pd.256") || +         Name.startswith("avx512.mask.mul.ps.128") || +         Name.startswith("avx512.mask.mul.ps.256") || +         Name.startswith("avx512.mask.sub.pd.128") || +         Name.startswith("avx512.mask.sub.pd.256") || +         Name.startswith("avx512.mask.sub.ps.128") || +         Name.startswith("avx512.mask.sub.ps.256") ||           Name.startswith("sse41.pmovsx") ||           Name.startswith("sse41.pmovzx") ||           Name.startswith("avx2.pmovsx") || @@ -1249,6 +1265,34 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {        Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));        Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,                            CI->getArgOperand(2)); +    } else if (IsX86 && (Name.startswith("avx512.mask.add.pd.128") || +                         Name.startswith("avx512.mask.add.pd.256") || +                         Name.startswith("avx512.mask.add.ps.128") || +                         Name.startswith("avx512.mask.add.ps.256"))) { +      Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); +      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, +                          CI->getArgOperand(2)); +    } else if (IsX86 && (Name.startswith("avx512.mask.div.pd.128") || +                         Name.startswith("avx512.mask.div.pd.256") || +                         Name.startswith("avx512.mask.div.ps.128") || +                         Name.startswith("avx512.mask.div.ps.256"))) { +      Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); +      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, +                          CI->getArgOperand(2)); +    } else if (IsX86 && (Name.startswith("avx512.mask.mul.pd.128") || +                         Name.startswith("avx512.mask.mul.pd.256") || +                         Name.startswith("avx512.mask.mul.ps.128") || +                         Name.startswith("avx512.mask.mul.ps.256"))) { +      Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1)); +      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, +                          CI->getArgOperand(2)); +    } else if (IsX86 && (Name.startswith("avx512.mask.sub.pd.128") || +                         Name.startswith("avx512.mask.sub.pd.256") || +                         Name.startswith("avx512.mask.sub.ps.128") || +                         Name.startswith("avx512.mask.sub.ps.256"))) { +      Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); +      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, +                          CI->getArgOperand(2));      } else {        llvm_unreachable("Unknown function for CallInst upgrade.");      } diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index c383f313b1c..11ca28ac321 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -378,12 +378,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {    X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0),    X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0), -  X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0), -  X86_INTRINSIC_DATA(avx512_mask_add_pd_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),    X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,    X86ISD::FADD_RND), -  X86_INTRINSIC_DATA(avx512_mask_add_ps_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0), -  X86_INTRINSIC_DATA(avx512_mask_add_ps_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),    X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD,    X86ISD::FADD_RND),    X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FADD, @@ -650,12 +646,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {                       X86ISD::DBPSADBW, 0),    X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_512, INTR_TYPE_3OP_IMM8_MASK,                       X86ISD::DBPSADBW, 0), -  X86_INTRINSIC_DATA(avx512_mask_div_pd_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0), -  X86_INTRINSIC_DATA(avx512_mask_div_pd_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),    X86_INTRINSIC_DATA(avx512_mask_div_pd_512, INTR_TYPE_2OP_MASK, ISD::FDIV,                       X86ISD::FDIV_RND), -  X86_INTRINSIC_DATA(avx512_mask_div_ps_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0), -  X86_INTRINSIC_DATA(avx512_mask_div_ps_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),    X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV,                       X86ISD::FDIV_RND),    X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV, @@ -798,12 +790,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {                       X86ISD::MOVSD, 0),    X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK,                       X86ISD::MOVSS, 0), -  X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0), -  X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),    X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,                       X86ISD::FMUL_RND), -  X86_INTRINSIC_DATA(avx512_mask_mul_ps_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0), -  X86_INTRINSIC_DATA(avx512_mask_mul_ps_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),    X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,                       X86ISD::FMUL_RND),    X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FMUL, @@ -1388,12 +1376,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {                       X86ISD::FSQRT_RND, 0),    X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK_RM,                       X86ISD::FSQRT_RND, 0), -  X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0), -  X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),    X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,                       X86ISD::FSUB_RND), -  X86_INTRINSIC_DATA(avx512_mask_sub_ps_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0), -  X86_INTRINSIC_DATA(avx512_mask_sub_ps_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),    X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB,                       X86ISD::FSUB_RND),    X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FSUB, diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index 52a8099b7a1..3a7f46a8f2c 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -2935,3 +2935,251 @@ define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %m  declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) +define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_maskz_add_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) +  ret <8 x float> %res +} + +define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { +; CHECK-LABEL: test_mm512_mask_add_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1] +; CHECK-NEXT:    vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) +  ret <8 x float> %res +} + +define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_add_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) +  ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_maskz_add_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) +  ret <4 x float> %res +} + +define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { +; CHECK-LABEL: test_mm512_mask_add_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1] +; CHECK-NEXT:    vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) +  ret <4 x float> %res +} + +define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_add_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) +  ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_maskz_sub_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) +  ret <8 x float> %res +} + +define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { +; CHECK-LABEL: test_mm512_mask_sub_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1] +; CHECK-NEXT:    vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) +  ret <8 x float> %res +} + +define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_sub_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5c,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) +  ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_maskz_sub_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) +  ret <4 x float> %res +} + +define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { +; CHECK-LABEL: test_mm512_mask_sub_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1] +; CHECK-NEXT:    vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) +  ret <4 x float> %res +} + +define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_sub_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5c,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) +  ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_maskz_mul_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) +  ret <8 x float> %res +} + +define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { +; CHECK-LABEL: test_mm512_mask_mul_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1] +; CHECK-NEXT:    vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) +  ret <8 x float> %res +} + +define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_mul_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x59,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) +  ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_maskz_mul_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) +  ret <4 x float> %res +} + +define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { +; CHECK-LABEL: test_mm512_mask_mul_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1] +; CHECK-NEXT:    vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) +  ret <4 x float> %res +} + +define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_mul_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x59,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) +  ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_maskz_div_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) +  ret <8 x float> %res +} + +define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { +; CHECK-LABEL: test_mm512_mask_div_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1] +; CHECK-NEXT:    vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) +  ret <8 x float> %res +} + +define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_div_ps_256: +; CHECK:       ## BB#0: +; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5e,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) +  ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_maskz_div_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) +  ret <4 x float> %res +} + +define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { +; CHECK-LABEL: test_mm512_mask_div_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1] +; CHECK-NEXT:    vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) +  ret <4 x float> %res +} + +define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { +; CHECK-LABEL: test_mm512_div_ps_128: +; CHECK:       ## BB#0: +; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5e,0xc1] +; CHECK-NEXT:    retq ## encoding: [0xc3] +  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) +  ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 6d35c828dcc..9a913186315 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -1483,254 +1483,6 @@ define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {   }   declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8) -define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_maskz_add_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) -  ret <8 x float> %res -} - -define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { -; CHECK-LABEL: test_mm512_mask_add_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1] -; CHECK-NEXT:    vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) -  ret <8 x float> %res -} - -define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_add_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) -  ret <8 x float> %res -} -declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) - -define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_maskz_add_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) -  ret <4 x float> %res -} - -define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { -; CHECK-LABEL: test_mm512_mask_add_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1] -; CHECK-NEXT:    vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) -  ret <4 x float> %res -} - -define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_add_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) -  ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) - -define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_maskz_sub_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) -  ret <8 x float> %res -} - -define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { -; CHECK-LABEL: test_mm512_mask_sub_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1] -; CHECK-NEXT:    vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) -  ret <8 x float> %res -} - -define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_sub_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5c,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) -  ret <8 x float> %res -} -declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) - -define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_maskz_sub_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) -  ret <4 x float> %res -} - -define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { -; CHECK-LABEL: test_mm512_mask_sub_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1] -; CHECK-NEXT:    vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) -  ret <4 x float> %res -} - -define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_sub_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5c,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) -  ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) - -define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_maskz_mul_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) -  ret <8 x float> %res -} - -define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { -; CHECK-LABEL: test_mm512_mask_mul_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1] -; CHECK-NEXT:    vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) -  ret <8 x float> %res -} - -define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_mul_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x59,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) -  ret <8 x float> %res -} -declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) - -define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_maskz_mul_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) -  ret <4 x float> %res -} - -define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { -; CHECK-LABEL: test_mm512_mask_mul_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1] -; CHECK-NEXT:    vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) -  ret <4 x float> %res -} - -define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_mul_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x59,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) -  ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) - -define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_maskz_div_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) -  ret <8 x float> %res -} - -define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { -; CHECK-LABEL: test_mm512_mask_div_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1] -; CHECK-NEXT:    vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) -  ret <8 x float> %res -} - -define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_div_ps_256: -; CHECK:       ## BB#0: -; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5e,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) -  ret <8 x float> %res -} -declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) - -define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_maskz_div_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) -  ret <4 x float> %res -} - -define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { -; CHECK-LABEL: test_mm512_mask_div_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1] -; CHECK-NEXT:    vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) -  ret <4 x float> %res -} - -define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { -; CHECK-LABEL: test_mm512_div_ps_128: -; CHECK:       ## BB#0: -; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5e,0xc1] -; CHECK-NEXT:    retq ## encoding: [0xc3] -  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) -  ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) -  define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {  ; CHECK-LABEL: test_mm512_maskz_max_ps_256:  ; CHECK:       ## BB#0: @@ -5532,9 +5284,9 @@ define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {  ; CHECK:       ## BB#0:  ; CHECK-NEXT:    vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]  ; CHECK-NEXT:    ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A] -; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI335_0-4, kind: reloc_riprel_4byte +; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI311_0-4, kind: reloc_riprel_4byte  ; CHECK-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A] -; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI335_1-4, kind: reloc_riprel_4byte +; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI311_1-4, kind: reloc_riprel_4byte  ; CHECK-NEXT:    retq ## encoding: [0xc3]    %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)    ret <8 x i32> %res @@ -5565,9 +5317,9 @@ define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {  ; CHECK:       ## BB#0:  ; CHECK-NEXT:    vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607]  ; CHECK-NEXT:    ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A] -; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI337_0-4, kind: reloc_riprel_4byte +; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI313_0-4, kind: reloc_riprel_4byte  ; CHECK-NEXT:    vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A] -; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI337_1-4, kind: reloc_riprel_4byte +; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI313_1-4, kind: reloc_riprel_4byte  ; CHECK-NEXT:    retq ## encoding: [0xc3]    %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)    ret <2 x i64> %res  | 

