diff options
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 21 | ||||
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 11 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll | 30 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 28 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll | 61 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 48 | 
8 files changed, 167 insertions, 53 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 7b0c69912cd..89c900f45f2 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -4226,23 +4226,20 @@ let TargetPrefix = "x86" in {                Intrinsic<[llvm_v32i16_ty], [llvm_v64i8_ty,                           llvm_v64i8_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_dbpsadbw_128 : -         GCCBuiltin<"__builtin_ia32_dbpsadbw128_mask">, +  def int_x86_avx512_dbpsadbw_128 : +         GCCBuiltin<"__builtin_ia32_dbpsadbw128">,            Intrinsic<[llvm_v8i16_ty], -          [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v8i16_ty, -           llvm_i8_ty], [IntrNoMem]>; +                    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_dbpsadbw_256 : -         GCCBuiltin<"__builtin_ia32_dbpsadbw256_mask">, +  def int_x86_avx512_dbpsadbw_256 : +         GCCBuiltin<"__builtin_ia32_dbpsadbw256">,            Intrinsic<[llvm_v16i16_ty], -          [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_v16i16_ty, -           llvm_i16_ty], [IntrNoMem]>; +                    [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; -  def int_x86_avx512_mask_dbpsadbw_512 : -         GCCBuiltin<"__builtin_ia32_dbpsadbw512_mask">, +  def int_x86_avx512_dbpsadbw_512 : +         GCCBuiltin<"__builtin_ia32_dbpsadbw512">,            Intrinsic<[llvm_v32i16_ty], -          [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, llvm_v32i16_ty, -           llvm_i32_ty], [IntrNoMem]>; +                    [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty], [IntrNoMem]>;  }  // Gather and Scatter ops diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 609048e00ba..2bec93fc3c8 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -262,6 +262,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {        Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0        Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0        Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0 +      Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0        Name.startswith("avx512.mask.add.p") || // Added in 7.0        Name.startswith("avx512.mask.sub.p") || // Added in 7.0        Name.startswith("avx512.mask.mul.p") || // Added in 7.0 @@ -1248,6 +1249,15 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,        IID = Intrinsic::x86_avx512_permvar_qi_512;      else        llvm_unreachable("Unexpected intrinsic"); +  } else if (Name.startswith("dbpsadbw.")) { +    if (VecWidth == 128) +      IID = Intrinsic::x86_avx512_dbpsadbw_128; +    else if (VecWidth == 256) +      IID = Intrinsic::x86_avx512_dbpsadbw_256; +    else if (VecWidth == 512) +      IID = Intrinsic::x86_avx512_dbpsadbw_512; +    else +      llvm_unreachable("Unexpected intrinsic");    } else      return false; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fffde112f2e..a2bd46bd3be 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20315,8 +20315,15 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,                           Op.getOperand(1), Op.getOperand(2));      }      case INTR_TYPE_3OP: -      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), -        Op.getOperand(2), Op.getOperand(3)); +    case INTR_TYPE_3OP_IMM8: { +      SDValue Src3 = Op.getOperand(3); + +      if (IntrData->Type == INTR_TYPE_3OP_IMM8) +        Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); + +      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), +                         Op.getOperand(1), Op.getOperand(2), Src3); +    }      case INTR_TYPE_4OP:        return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),          Op.getOperand(2), Op.getOperand(3), Op.getOperand(4)); diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 5381caa710d..138ffb2d4e0 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -23,7 +23,7 @@ enum IntrinsicType : uint16_t {    INTR_NO_TYPE,    GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASS, FPCLASSS,    INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, -  INTR_TYPE_3OP_RM, +  INTR_TYPE_3OP_RM, INTR_TYPE_3OP_IMM8,    CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,    CVTPD2PS, CVTPD2PS_MASK,    INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, @@ -392,6 +392,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {    X86_INTRINSIC_DATA(avx512_cvtusi2ss,   INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),    X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),    X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), +  X86_INTRINSIC_DATA(avx512_dbpsadbw_128, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0), +  X86_INTRINSIC_DATA(avx512_dbpsadbw_256, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0), +  X86_INTRINSIC_DATA(avx512_dbpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0),    X86_INTRINSIC_DATA(avx512_div_pd_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),    X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),    X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), @@ -579,12 +582,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {                       ISD::UINT_TO_FP, 0),    X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_512, INTR_TYPE_1OP_MASK,                       ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND), -  X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_128, INTR_TYPE_3OP_IMM8_MASK, -                     X86ISD::DBPSADBW, 0), -  X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_256, INTR_TYPE_3OP_IMM8_MASK, -                     X86ISD::DBPSADBW, 0), -  X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_512, INTR_TYPE_3OP_IMM8_MASK, -                     X86ISD::DBPSADBW, 0),    X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM,                       X86ISD::FDIVS_RND, 0),    X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM, diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index 8c62bd59e88..aceddcb36f9 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -2858,3 +2858,33 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32    %res2 = add <32 x i16> %res, %res1    ret <32 x i16> %res2  } + +declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { +; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: +; X86:       # %bb.0: +; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02] +; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] +; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] +; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] +; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X86-NEXT:    retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: +; X64:       # %bb.0: +; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02] +; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] +; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] +; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] +; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X64-NEXT:    retq # encoding: [0xc3] +  %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) +  %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4) +  %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1) +  %res3 = add <32 x i16> %res, %res1 +  %res4 = add <32 x i16> %res3, %res2 +  ret <32 x i16> %res4 +} diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 1f55a29dae4..80cb24a7083 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1376,33 +1376,37 @@ define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i    ret <16 x i32> %res2  } -declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32) +declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32)  define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {  ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:  ; X86:       # %bb.0: +; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02]  ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]  ; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] -; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x02] -; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x02] -; X86-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] +; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]  ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]  ; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:  ; X64:       # %bb.0: +; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02]  ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]  ; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] -; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x02] -; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x02] -; X64-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] +; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] +; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]  ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]  ; X64-NEXT:    retq # encoding: [0xc3] -  %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) -  %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4) -  %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1) -  %res3 = add <32 x i16> %res, %res1 -  %res4 = add <32 x i16> %res3, %res2 +  %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) +  %2 = bitcast i32 %x4 to <32 x i1> +  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 +  %4 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) +  %5 = bitcast i32 %x4 to <32 x i1> +  %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer +  %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) +  %res3 = add <32 x i16> %3, %6 +  %res4 = add <32 x i16> %res3, %7    ret <32 x i16> %res4  } diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll index 31a298b4cf2..e5aa1bea595 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -6284,3 +6284,64 @@ define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16    %res2 = add <16 x i16> %res, %res1    ret <16 x i16> %res2  } + +declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { +; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: +; X86:       # %bb.0: +; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x02] +; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] +; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xc1,0x02] +; X86-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] +; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X86-NEXT:    retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: +; X64:       # %bb.0: +; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x02] +; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] +; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xc1,0x02] +; X64-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] +; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X64-NEXT:    retq # encoding: [0xc3] +  %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) +  %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4) +  %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 -1) +  %res3 = add <8 x i16> %res, %res1 +  %res4 = add <8 x i16> %res2, %res3 +  ret <8 x i16> %res4 +} + +declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { +; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: +; X86:       # %bb.0: +; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x02] +; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] +; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xc1,0x02] +; X86-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] +; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X86-NEXT:    retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: +; X64:       # %bb.0: +; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x02] +; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] +; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xc1,0x02] +; X64-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] +; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X64-NEXT:    retq # encoding: [0xc3] +  %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) +  %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4) +  %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 -1) +  %res3 = add <16 x i16> %res, %res1 +  %res4 = add <16 x i16> %res3, %res2 +  ret <16 x i16> %res4 +} diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index ec295f537a3..4e73a2f5735 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -3484,64 +3484,72 @@ define <16 x i16> @test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x    ret <16 x i16> %res2  } -declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8) +declare <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8>, <16 x i8>, i32)  define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {  ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:  ; X86:       # %bb.0: +; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x02]  ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]  ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]  ; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] -; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x02] -; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x02] -; X86-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] +; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xc1,0x02] +; X86-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]  ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]  ; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:  ; X64:       # %bb.0: -; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]  ; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x02] +; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]  ; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]  ; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xc1,0x02]  ; X64-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]  ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]  ; X64-NEXT:    retq # encoding: [0xc3] -  %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) -  %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4) -  %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 -1) -  %res3 = add <8 x i16> %res, %res1 -  %res4 = add <8 x i16> %res2, %res3 +  %1 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2) +  %2 = bitcast i8 %x4 to <8 x i1> +  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 +  %4 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2) +  %5 = bitcast i8 %x4 to <8 x i1> +  %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer +  %7 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2) +  %res3 = add <8 x i16> %3, %6 +  %res4 = add <8 x i16> %7, %res3    ret <8 x i16> %res4  } -declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16) +declare <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8>, <32 x i8>, i32)  define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {  ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:  ; X86:       # %bb.0: +; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x02]  ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]  ; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] -; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x02] -; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x02] -; X86-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] +; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xc1,0x02] +; X86-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]  ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]  ; X86-NEXT:    retl # encoding: [0xc3]  ;  ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:  ; X64:       # %bb.0: -; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]  ; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x02] +; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]  ; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]  ; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xc1,0x02]  ; X64-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]  ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]  ; X64-NEXT:    retq # encoding: [0xc3] -  %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) -  %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4) -  %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 -1) -  %res3 = add <16 x i16> %res, %res1 -  %res4 = add <16 x i16> %res3, %res2 +  %1 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2) +  %2 = bitcast i16 %x4 to <16 x i1> +  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 +  %4 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2) +  %5 = bitcast i16 %x4 to <16 x i1> +  %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer +  %7 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2) +  %res3 = add <16 x i16> %3, %6 +  %res4 = add <16 x i16> %res3, %7    ret <16 x i16> %res4  }  | 

